diff --git "a/profile_trace/iteration_11776/rank5_trace.json" "b/profile_trace/iteration_11776/rank5_trace.json" new file mode 100644--- /dev/null +++ "b/profile_trace/iteration_11776/rank5_trace.json" @@ -0,0 +1,157273 @@ + +{ + "schemaVersion": 1, + "deviceProperties": [ + { + "id": 0, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 1, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 2, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 3, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 4, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 5, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 6, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 7, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + } + ], + "cupti_version": 22, + "cuda_runtime_version": 12040, + "cuda_driver_version": 12080, + "distributedInfo": {"backend": "nccl", "rank": 5, "world_size": 8, "pg_count": 1, "pg_config": [{"pg_name": "0", "pg_desc": "default_pg", "backend_config": "cuda:nccl", "pg_size": 8, "ranks": [0, 1, 2, 3, 4, 5, 6, 7]}], "nccl_version": "2.21.5"}, + "record_shapes": 1, + "trace_id": "88E4535CEF194E02A473FF0092446B02", + "traceEvents": [ + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: DivBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141790275.527, "dur": 122.578, + "args": { + "External id": 502273,"Record function id": 0, "Sequence number": 5516236, "Fwd thread id": 1, "Ev Idx": 0 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "DivBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141790300.032, "dur": 87.970, + "args": { + "External id": 502274,"Sequence number": 5516236, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 1 + } + }, + { + "ph": "f", "id": 1, "pid": 2338711, "tid": 2379440, "ts": 6274141790300.032, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338711, "tid": 2379440, + "ts": 6274141790308.739, "dur": 77.373, + "args": { + "External id": 502275,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 2 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141790413.450, "dur": 246.341, + "args": { + "External id": 502276,"Record function id": 0, "Ev Idx": 3 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141790470.580, "dur": 84.493, + "args": { + "External id": 502277,"Record function id": 0, "Ev Idx": 4 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.25", "pid": 2338711, "tid": 2379440, + "ts": 6274141790501.782, "dur": 42.473, + "args": { + "External id": 502278,"Record function id": 0, "Ev Idx": 5 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141790559.902, "dur": 1.754, + "args": { + "External id": 502279,"Sequence number": 5516235, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 6 + } + }, + { + "ph": "f", "id": 2, "pid": 2338711, "tid": 2379440, "ts": 6274141790559.902, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141790565.808, "dur": 87.798, + "args": { + "External id": 502280,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 7 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141790576.933, "dur": 76.038, + "args": { + "External id": 502281,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 8 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141790608.982, "dur": 4.625, + "args": { + "External id": 502282,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141790672.826, "dur": 41434.469, + "args": { + "External id": 502283,"Record function id": 0, "Sequence number": 5516233, "Fwd thread id": 1, "Ev Idx": 10 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141790675.267, "dur": 41411.996, + "args": { + "External id": 502284,"Sequence number": 5516233, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11 + } + }, + { + "ph": "f", "id": 3, "pid": 2338711, "tid": 2379440, "ts": 6274141790675.267, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141790723.313, "dur": 5.240, + "args": { + "External id": 502285,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274141790735.627, "dur": 41032.647, + "args": { + "External id": 502286,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274141790737.899, "dur": 41029.874, + "args": { + "External id": 502287,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 14 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141790743.225, "dur": 7.759, + "args": { + "External id": 502288,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141790752.739, "dur": 41012.965, + "args": { + "External id": 502289,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 16 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338711, "tid": 2379440, + "ts": 6274141831774.321, "dur": 0.538, + "args": { + "External id": 502290,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 17 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338711, "tid": 2379440, + "ts": 6274141831777.314, "dur": 3.683, + "args": { + "External id": 502291,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 18 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338711, "tid": 2379440, + "ts": 6274141831779.179, "dur": 1.478, + "args": { + "External id": 502292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338711, "tid": 2379440, + "ts": 6274141831788.313, "dur": 33.313, + "args": { + "External id": 502293,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 20 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338711, "tid": 2379440, + "ts": 6274141831831.211, "dur": 48.719, + "args": { + "External id": 502294,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338711, "tid": 2379440, + "ts": 6274141831833.221, "dur": 46.509, + "args": { + "External id": 502295,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 22 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338711, "tid": 2379440, + "ts": 6274141831834.870, "dur": 44.521, + "args": { + "External id": 502296,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 23 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141832126.110, "dur": 22.698, + "args": { + "External id": 502297,"Record function id": 0, "Sequence number": 5516232, "Fwd thread id": 1, "Ev Idx": 24 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141832128.347, "dur": 17.181, + "args": { + "External id": 502298,"Sequence number": 5516232, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 25 + } + }, + { + "ph": "f", "id": 4, "pid": 2338711, "tid": 2379440, "ts": 6274141832128.347, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141832134.080, "dur": 11.192, + "args": { + "External id": 502299,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 26 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141832138.811, "dur": 6.240, + "args": { + "External id": 502300,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 27 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141832155.331, "dur": 113.900, + "args": { + "External id": 502301,"Record function id": 0, "Sequence number": 5516231, "Fwd thread id": 1, "Ev Idx": 28 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141832156.298, "dur": 105.508, + "args": { + "External id": 502302,"Sequence number": 5516231, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 29 + } + }, + { + "ph": "f", "id": 5, "pid": 2338711, "tid": 2379440, "ts": 6274141832156.298, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141832160.026, "dur": 101.253, + "args": { + "External id": 502303,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 30 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274141832165.257, "dur": 44.655, + "args": { + "External id": 502304,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 31 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141832171.080, "dur": 7.384, + "args": { + "External id": 502305,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 32 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274141832180.095, "dur": 29.542, + "args": { + "External id": 502306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 33 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274141832185.619, "dur": 23.471, + "args": { + "External id": 502307,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 34 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274141832212.249, "dur": 7.742, + "args": { + "External id": 502308,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 35 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141832215.306, "dur": 4.258, + "args": { + "External id": 502309,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 36 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141832221.538, "dur": 38.854, + "args": { + "External id": 502310,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 37 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141832273.875, "dur": 61.154, + "args": { + "External id": 502311,"Record function id": 0, "Sequence number": 5516230, "Fwd thread id": 1, "Ev Idx": 38 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141832274.982, "dur": 56.734, + "args": { + "External id": 502312,"Sequence number": 5516230, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 39 + } + }, + { + "ph": "f", "id": 6, "pid": 2338711, "tid": 2379440, "ts": 6274141832274.982, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141832278.095, "dur": 53.325, + "args": { + "External id": 502313,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "3"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 40 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274141832283.776, "dur": 19.655, + "args": { + "External id": 502314,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 41 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141832284.966, "dur": 2.879, + "args": { + "External id": 502315,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 42 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274141832288.815, "dur": 14.355, + "args": { + "External id": 502316,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 43 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274141832291.749, "dur": 11.068, + "args": { + "External id": 502317,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 44 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2379440, + "ts": 6274141832305.138, "dur": 8.035, + "args": { + "External id": 502318,"Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 45 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141832311.203, "dur": 1.283, + "args": { + "External id": 502319,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 46 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141832313.965, "dur": 16.899, + "args": { + "External id": 502320,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 47 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141832338.826, "dur": 221.241, + "args": { + "External id": 502321,"Record function id": 0, "Sequence number": 5516229, "Fwd thread id": 1, "Ev Idx": 48 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141832339.898, "dur": 216.306, + "args": { + "External id": 502322,"Sequence number": 5516229, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 49 + } + }, + { + "ph": "f", "id": 7, "pid": 2338711, "tid": 2379440, "ts": 6274141832339.898, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141832344.112, "dur": 211.632, + "args": { + "External id": 502323,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 50 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274141832348.095, "dur": 16.137, + "args": { + "External id": 502324,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 51 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141832349.048, "dur": 2.580, + "args": { + "External id": 502325,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 52 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274141832352.217, "dur": 11.754, + "args": { + "External id": 502326,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 53 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274141832353.191, "dur": 10.426, + "args": { + "External id": 502327,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 54 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274141832365.333, "dur": 2.046, + "args": { + "External id": 502328,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 55 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141832366.533, "dur": 0.665, + "args": { + "External id": 502329,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 56 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141832370.559, "dur": 184.105, + "args": { + "External id": 502330,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 57 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141832564.556, "dur": 126.066, + "args": { + "External id": 502331,"Record function id": 0, "Sequence number": 5516228, "Fwd thread id": 1, "Ev Idx": 58 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141832565.738, "dur": 119.574, + "args": { + "External id": 502332,"Sequence number": 5516228, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 59 + } + }, + { + "ph": "f", "id": 8, "pid": 2338711, "tid": 2379440, "ts": 6274141832565.738, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141832567.242, "dur": 117.608, + "args": { + "External id": 502333,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 60 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274141832570.703, "dur": 37.368, + "args": { + "External id": 502334,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 61 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141832571.474, "dur": 2.153, + "args": { + "External id": 502335,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 62 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274141832574.291, "dur": 33.455, + "args": { + "External id": 502336,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 63 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274141832575.461, "dur": 31.368, + "args": { + "External id": 502337,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 64 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274141832610.134, "dur": 6.209, + "args": { + "External id": 502338,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 65 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141832614.697, "dur": 1.321, + "args": { + "External id": 502339,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 66 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141832617.570, "dur": 66.131, + "args": { + "External id": 502340,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 67 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141832700.117, "dur": 42.792, + "args": { + "External id": 502341,"Record function id": 0, "Sequence number": 5516227, "Fwd thread id": 1, "Ev Idx": 68 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141832701.844, "dur": 1.245, + "args": { + "External id": 502342,"Sequence number": 5516227, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 69 + } + }, + { + "ph": "f", "id": 9, "pid": 2338711, "tid": 2379440, "ts": 6274141832701.844, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141832706.308, "dur": 33.034, + "args": { + "External id": 502343,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 70 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141832709.102, "dur": 29.740, + "args": { + "External id": 502344,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 71 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141832718.250, "dur": 2.210, + "args": { + "External id": 502345,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 72 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141832747.975, "dur": 2536.641, + "args": { + "External id": 502346,"Record function id": 0, "Sequence number": 5516225, "Fwd thread id": 1, "Ev Idx": 73 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141832749.551, "dur": 2491.690, + "args": { + "External id": 502347,"Sequence number": 5516225, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 74 + } + }, + { + "ph": "f", "id": 10, "pid": 2338711, "tid": 2379440, "ts": 6274141832749.551, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141832794.324, "dur": 4.189, + "args": { + "External id": 502348,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 75 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274141832801.447, "dur": 2166.006, + "args": { + "External id": 502349,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 76 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274141832803.222, "dur": 2163.906, + "args": { + "External id": 502350,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 77 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141832806.589, "dur": 6.269, + "args": { + "External id": 502351,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 78 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141832813.782, "dur": 2151.511, + "args": { + "External id": 502352,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 79 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338711, "tid": 2379440, + "ts": 6274141834971.839, "dur": 0.439, + "args": { + "External id": 502353,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 80 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338711, "tid": 2379440, + "ts": 6274141834973.935, "dur": 5.823, + "args": { + "External id": 502354,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 81 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338711, "tid": 2379440, + "ts": 6274141834978.631, "dur": 0.927, + "args": { + "External id": 502355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 82 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338711, "tid": 2379440, + "ts": 6274141834985.514, "dur": 25.588, + "args": { + "External id": 502356,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 83 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338711, "tid": 2379440, + "ts": 6274141835017.447, "dur": 78.913, + "args": { + "External id": 502357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 84 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338711, "tid": 2379440, + "ts": 6274141835018.923, "dur": 77.224, + "args": { + "External id": 502358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 85 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338711, "tid": 2379440, + "ts": 6274141835020.457, "dur": 75.055, + "args": { + "External id": 502359,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 86 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274141835257.976, "dur": 21.964, + "args": { + "External id": 502360,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 87 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141835298.958, "dur": 18.698, + "args": { + "External id": 502361,"Record function id": 0, "Sequence number": 5516224, "Fwd thread id": 1, "Ev Idx": 88 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141835301.000, "dur": 13.727, + "args": { + "External id": 502362,"Sequence number": 5516224, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 89 + } + }, + { + "ph": "f", "id": 11, "pid": 2338711, "tid": 2379440, "ts": 6274141835301.000, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141835307.996, "dur": 6.473, + "args": { + "External id": 502363,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 90 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141835309.481, "dur": 4.746, + "args": { + "External id": 502364,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 91 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141835321.181, "dur": 80.884, + "args": { + "External id": 502365,"Record function id": 0, "Sequence number": 5516223, "Fwd thread id": 1, "Ev Idx": 92 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141835322.129, "dur": 74.768, + "args": { + "External id": 502366,"Sequence number": 5516223, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 93 + } + }, + { + "ph": "f", "id": 12, "pid": 2338711, "tid": 2379440, "ts": 6274141835322.129, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141835324.659, "dur": 71.797, + "args": { + "External id": 502367,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 94 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274141835332.903, "dur": 23.715, + "args": { + "External id": 502368,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 95 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141835335.303, "dur": 4.365, + "args": { + "External id": 502369,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 96 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274141835340.732, "dur": 15.650, + "args": { + "External id": 502370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 97 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274141835342.783, "dur": 13.212, + "args": { + "External id": 502371,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 98 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274141835358.427, "dur": 4.084, + "args": { + "External id": 502372,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 99 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141835360.916, "dur": 1.328, + "args": { + "External id": 502373,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141835363.727, "dur": 31.777, + "args": { + "External id": 502374,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141835406.578, "dur": 56.847, + "args": { + "External id": 502375,"Record function id": 0, "Sequence number": 5516222, "Fwd thread id": 1, "Ev Idx": 102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141835407.550, "dur": 52.658, + "args": { + "External id": 502376,"Sequence number": 5516222, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 103 + } + }, + { + "ph": "f", "id": 13, "pid": 2338711, "tid": 2379440, "ts": 6274141835407.550, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141835412.086, "dur": 47.837, + "args": { + "External id": 502377,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "2"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274141835414.055, "dur": 20.709, + "args": { + "External id": 502378,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141835415.043, "dur": 2.260, + "args": { + "External id": 502379,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274141835420.533, "dur": 13.957, + "args": { + "External id": 502380,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274141835421.270, "dur": 12.829, + "args": { + "External id": 502381,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2379440, + "ts": 6274141835436.154, "dur": 7.763, + "args": { + "External id": 502382,"Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141835442.227, "dur": 1.114, + "args": { + "External id": 502383,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141835444.554, "dur": 14.806, + "args": { + "External id": 502384,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141835470.044, "dur": 152.116, + "args": { + "External id": 502385,"Record function id": 0, "Sequence number": 5516221, "Fwd thread id": 1, "Ev Idx": 112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141835471.009, "dur": 144.279, + "args": { + "External id": 502386,"Sequence number": 5516221, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 113 + } + }, + { + "ph": "f", "id": 14, "pid": 2338711, "tid": 2379440, "ts": 6274141835471.009, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141835472.912, "dur": 141.755, + "args": { + "External id": 502387,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274141835474.149, "dur": 16.988, + "args": { + "External id": 502388,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141835475.027, "dur": 1.887, + "args": { + "External id": 502389,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274141835480.131, "dur": 10.727, + "args": { + "External id": 502390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274141835481.294, "dur": 9.163, + "args": { + "External id": 502391,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274141835492.114, "dur": 4.408, + "args": { + "External id": 502392,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141835495.845, "dur": 0.542, + "args": { + "External id": 502393,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141835497.632, "dur": 115.434, + "args": { + "External id": 502394,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141835631.494, "dur": 119.953, + "args": { + "External id": 502395,"Record function id": 0, "Sequence number": 5516220, "Fwd thread id": 1, "Ev Idx": 122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141835633.285, "dur": 96.334, + "args": { + "External id": 502396,"Sequence number": 5516220, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 123 + } + }, + { + "ph": "f", "id": 15, "pid": 2338711, "tid": 2379440, "ts": 6274141835633.285, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141835638.548, "dur": 90.636, + "args": { + "External id": 502397,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274141835640.390, "dur": 25.627, + "args": { + "External id": 502398,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141835641.801, "dur": 3.715, + "args": { + "External id": 502399,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274141835646.623, "dur": 19.074, + "args": { + "External id": 502400,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274141835650.578, "dur": 14.745, + "args": { + "External id": 502401,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274141835667.075, "dur": 2.611, + "args": { + "External id": 502402,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141835668.652, "dur": 0.822, + "args": { + "External id": 502403,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141835670.663, "dur": 57.903, + "args": { + "External id": 502404,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274141835734.025, "dur": 15.864, + "args": { + "External id": 502405,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141835756.831, "dur": 38.768, + "args": { + "External id": 502406,"Record function id": 0, "Sequence number": 5516219, "Fwd thread id": 1, "Ev Idx": 133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141835758.067, "dur": 1.398, + "args": { + "External id": 502407,"Sequence number": 5516219, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 134 + } + }, + { + "ph": "f", "id": 16, "pid": 2338711, "tid": 2379440, "ts": 6274141835758.067, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141835761.898, "dur": 30.732, + "args": { + "External id": 502408,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141835764.217, "dur": 28.016, + "args": { + "External id": 502409,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141835774.643, "dur": 0.698, + "args": { + "External id": 502410,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141835800.901, "dur": 3571.511, + "args": { + "External id": 502411,"Record function id": 0, "Sequence number": 5516217, "Fwd thread id": 1, "Ev Idx": 138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141835804.590, "dur": 3532.722, + "args": { + "External id": 502412,"Sequence number": 5516217, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 139 + } + }, + { + "ph": "f", "id": 17, "pid": 2338711, "tid": 2379440, "ts": 6274141835804.590, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141835843.947, "dur": 3.078, + "args": { + "External id": 502413,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274141835849.896, "dur": 3253.386, + "args": { + "External id": 502414,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274141835851.822, "dur": 3251.134, + "args": { + "External id": 502415,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141835854.503, "dur": 4.060, + "args": { + "External id": 502416,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141835862.275, "dur": 3239.018, + "args": { + "External id": 502417,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338711, "tid": 2379440, + "ts": 6274141839108.158, "dur": 0.404, + "args": { + "External id": 502418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338711, "tid": 2379440, + "ts": 6274141839110.116, "dur": 2.484, + "args": { + "External id": 502419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338711, "tid": 2379440, + "ts": 6274141839111.486, "dur": 0.960, + "args": { + "External id": 502420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338711, "tid": 2379440, + "ts": 6274141839118.081, "dur": 27.287, + "args": { + "External id": 502421,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338711, "tid": 2379440, + "ts": 6274141839154.108, "dur": 44.174, + "args": { + "External id": 502422,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338711, "tid": 2379440, + "ts": 6274141839155.876, "dur": 42.155, + "args": { + "External id": 502423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338711, "tid": 2379440, + "ts": 6274141839157.368, "dur": 40.359, + "args": { + "External id": 502424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274141839351.120, "dur": 17.380, + "args": { + "External id": 502425,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141839386.016, "dur": 18.106, + "args": { + "External id": 502426,"Record function id": 0, "Sequence number": 5516216, "Fwd thread id": 1, "Ev Idx": 153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141839390.948, "dur": 10.709, + "args": { + "External id": 502427,"Sequence number": 5516216, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 154 + } + }, + { + "ph": "f", "id": 18, "pid": 2338711, "tid": 2379440, "ts": 6274141839390.948, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141839395.445, "dur": 5.938, + "args": { + "External id": 502428,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141839396.842, "dur": 4.265, + "args": { + "External id": 502429,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141839407.677, "dur": 79.024, + "args": { + "External id": 502430,"Record function id": 0, "Sequence number": 5516215, "Fwd thread id": 1, "Ev Idx": 157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141839408.693, "dur": 73.467, + "args": { + "External id": 502431,"Sequence number": 5516215, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 158 + } + }, + { + "ph": "f", "id": 19, "pid": 2338711, "tid": 2379440, "ts": 6274141839408.693, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141839413.921, "dur": 67.830, + "args": { + "External id": 502432,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274141839416.933, "dur": 22.374, + "args": { + "External id": 502433,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141839418.738, "dur": 3.753, + "args": { + "External id": 502434,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274141839423.665, "dur": 15.367, + "args": { + "External id": 502435,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274141839425.627, "dur": 13.004, + "args": { + "External id": 502436,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274141839441.123, "dur": 6.370, + "args": { + "External id": 502437,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141839445.844, "dur": 1.393, + "args": { + "External id": 502438,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141839451.034, "dur": 29.691, + "args": { + "External id": 502439,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141839490.920, "dur": 51.502, + "args": { + "External id": 502440,"Record function id": 0, "Sequence number": 5516214, "Fwd thread id": 1, "Ev Idx": 167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141839491.881, "dur": 48.015, + "args": { + "External id": 502441,"Sequence number": 5516214, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 168 + } + }, + { + "ph": "f", "id": 20, "pid": 2338711, "tid": 2379440, "ts": 6274141839491.881, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141839493.903, "dur": 45.763, + "args": { + "External id": 502442,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274141839496.060, "dur": 15.727, + "args": { + "External id": 502443,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141839497.026, "dur": 2.650, + "args": { + "External id": 502444,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274141839500.413, "dur": 11.112, + "args": { + "External id": 502445,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274141839501.315, "dur": 9.846, + "args": { + "External id": 502446,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2379440, + "ts": 6274141839513.266, "dur": 10.039, + "args": { + "External id": 502447,"Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141839521.614, "dur": 1.126, + "args": { + "External id": 502448,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141839523.996, "dur": 14.984, + "args": { + "External id": 502449,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141839546.265, "dur": 170.498, + "args": { + "External id": 502450,"Record function id": 0, "Sequence number": 5516213, "Fwd thread id": 1, "Ev Idx": 177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141839547.645, "dur": 163.222, + "args": { + "External id": 502451,"Sequence number": 5516213, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 178 + } + }, + { + "ph": "f", "id": 21, "pid": 2338711, "tid": 2379440, "ts": 6274141839547.645, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141839549.612, "dur": 160.693, + "args": { + "External id": 502452,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274141839550.933, "dur": 19.487, + "args": { + "External id": 502453,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141839551.985, "dur": 4.157, + "args": { + "External id": 502454,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274141839556.731, "dur": 13.432, + "args": { + "External id": 502455,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274141839557.547, "dur": 12.243, + "args": { + "External id": 502456,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274141839571.417, "dur": 5.786, + "args": { + "External id": 502457,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141839576.503, "dur": 0.514, + "args": { + "External id": 502458,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141839580.826, "dur": 127.447, + "args": { + "External id": 502459,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141839724.956, "dur": 111.076, + "args": { + "External id": 502460,"Record function id": 0, "Sequence number": 5516212, "Fwd thread id": 1, "Ev Idx": 187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141839726.114, "dur": 89.718, + "args": { + "External id": 502461,"Sequence number": 5516212, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 188 + } + }, + { + "ph": "f", "id": 22, "pid": 2338711, "tid": 2379440, "ts": 6274141839726.114, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141839730.838, "dur": 84.747, + "args": { + "External id": 502462,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274141839732.250, "dur": 19.957, + "args": { + "External id": 502463,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141839733.303, "dur": 3.067, + "args": { + "External id": 502464,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274141839737.346, "dur": 14.585, + "args": { + "External id": 502465,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274141839738.655, "dur": 12.924, + "args": { + "External id": 502466,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274141839753.407, "dur": 4.087, + "args": { + "External id": 502467,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141839756.669, "dur": 0.642, + "args": { + "External id": 502468,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141839761.382, "dur": 53.491, + "args": { + "External id": 502469,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274141839820.312, "dur": 14.240, + "args": { + "External id": 502470,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141839843.279, "dur": 35.071, + "args": { + "External id": 502471,"Record function id": 0, "Sequence number": 5516211, "Fwd thread id": 1, "Ev Idx": 198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141839844.345, "dur": 1.193, + "args": { + "External id": 502472,"Sequence number": 5516211, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 199 + } + }, + { + "ph": "f", "id": 23, "pid": 2338711, "tid": 2379440, "ts": 6274141839844.345, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141839847.829, "dur": 25.211, + "args": { + "External id": 502473,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141839850.212, "dur": 22.332, + "args": { + "External id": 502474,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141839855.543, "dur": 0.531, + "args": { + "External id": 502475,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141839882.839, "dur": 3578.338, + "args": { + "External id": 502476,"Record function id": 0, "Sequence number": 5516210, "Fwd thread id": 1, "Ev Idx": 203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141839895.073, "dur": 3531.373, + "args": { + "External id": 502477,"Sequence number": 5516210, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 204 + } + }, + { + "ph": "f", "id": 24, "pid": 2338711, "tid": 2379440, "ts": 6274141839895.073, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141839928.965, "dur": 3.083, + "args": { + "External id": 502478,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274141839934.965, "dur": 3262.927, + "args": { + "External id": 502479,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274141839936.712, "dur": 3260.759, + "args": { + "External id": 502480,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141839942.236, "dur": 3.905, + "args": { + "External id": 502481,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141839946.906, "dur": 3248.855, + "args": { + "External id": 502482,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338711, "tid": 2379440, + "ts": 6274141843202.182, "dur": 0.429, + "args": { + "External id": 502483,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338711, "tid": 2379440, + "ts": 6274141843206.884, "dur": 2.853, + "args": { + "External id": 502484,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338711, "tid": 2379440, + "ts": 6274141843208.585, "dur": 0.988, + "args": { + "External id": 502485,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338711, "tid": 2379440, + "ts": 6274141843215.075, "dur": 23.904, + "args": { + "External id": 502486,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338711, "tid": 2379440, + "ts": 6274141843244.831, "dur": 47.760, + "args": { + "External id": 502487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338711, "tid": 2379440, + "ts": 6274141843246.214, "dur": 46.175, + "args": { + "External id": 502488,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338711, "tid": 2379440, + "ts": 6274141843247.995, "dur": 44.138, + "args": { + "External id": 502489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274141843439.558, "dur": 16.998, + "args": { + "External id": 502490,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141843477.361, "dur": 14.059, + "args": { + "External id": 502491,"Record function id": 0, "Ev Idx": 218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141843480.139, "dur": 9.562, + "args": { + "External id": 502492,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141843483.476, "dur": 5.210, + "args": { + "External id": 502493,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141843484.795, "dur": 3.713, + "args": { + "External id": 502494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141843495.258, "dur": 18.243, + "args": { + "External id": 502495,"Record function id": 0, "Sequence number": 5516209, "Fwd thread id": 1, "Ev Idx": 222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141843499.346, "dur": 11.975, + "args": { + "External id": 502496,"Sequence number": 5516209, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 223 + } + }, + { + "ph": "f", "id": 25, "pid": 2338711, "tid": 2379440, "ts": 6274141843499.346, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141843503.382, "dur": 7.640, + "args": { + "External id": 502497,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141843507.098, "dur": 3.697, + "args": { + "External id": 502498,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141843516.835, "dur": 98.334, + "args": { + "External id": 502499,"Record function id": 0, "Sequence number": 5516208, "Fwd thread id": 1, "Ev Idx": 226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141843517.712, "dur": 89.817, + "args": { + "External id": 502500,"Sequence number": 5516208, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 227 + } + }, + { + "ph": "f", "id": 26, "pid": 2338711, "tid": 2379440, "ts": 6274141843517.712, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141843519.884, "dur": 87.169, + "args": { + "External id": 502501,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274141843522.927, "dur": 26.395, + "args": { + "External id": 502502,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141843524.990, "dur": 3.932, + "args": { + "External id": 502503,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274141843530.190, "dur": 18.853, + "args": { + "External id": 502504,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274141843532.028, "dur": 16.479, + "args": { + "External id": 502505,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274141843551.062, "dur": 6.347, + "args": { + "External id": 502506,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141843555.829, "dur": 1.200, + "args": { + "External id": 502507,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141843561.086, "dur": 44.606, + "args": { + "External id": 502508,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141843623.585, "dur": 68.177, + "args": { + "External id": 502509,"Record function id": 0, "Sequence number": 5516207, "Fwd thread id": 1, "Ev Idx": 236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141843625.389, "dur": 63.470, + "args": { + "External id": 502510,"Sequence number": 5516207, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 237 + } + }, + { + "ph": "f", "id": 27, "pid": 2338711, "tid": 2379440, "ts": 6274141843625.389, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141843628.579, "dur": 59.922, + "args": { + "External id": 502511,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274141843631.668, "dur": 28.888, + "args": { + "External id": 502512,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141843638.141, "dur": 4.788, + "args": { + "External id": 502513,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274141843644.048, "dur": 16.200, + "args": { + "External id": 502514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274141843645.778, "dur": 13.933, + "args": { + "External id": 502515,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2379440, + "ts": 6274141843662.081, "dur": 8.516, + "args": { + "External id": 502516,"Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141843668.705, "dur": 1.251, + "args": { + "External id": 502517,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141843671.239, "dur": 16.782, + "args": { + "External id": 502518,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141843695.865, "dur": 133.406, + "args": { + "External id": 502519,"Record function id": 0, "Sequence number": 5516206, "Fwd thread id": 1, "Ev Idx": 246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141843696.922, "dur": 129.259, + "args": { + "External id": 502520,"Sequence number": 5516206, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 247 + } + }, + { + "ph": "f", "id": 28, "pid": 2338711, "tid": 2379440, "ts": 6274141843696.922, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141843698.469, "dur": 127.334, + "args": { + "External id": 502521,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274141843702.327, "dur": 15.131, + "args": { + "External id": 502522,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141843703.405, "dur": 2.180, + "args": { + "External id": 502523,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274141843706.260, "dur": 10.947, + "args": { + "External id": 502524,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274141843707.183, "dur": 9.696, + "args": { + "External id": 502525,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274141843718.664, "dur": 4.786, + "args": { + "External id": 502526,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141843720.116, "dur": 3.108, + "args": { + "External id": 502527,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141843726.871, "dur": 97.894, + "args": { + "External id": 502528,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141843833.854, "dur": 110.929, + "args": { + "External id": 502529,"Record function id": 0, "Sequence number": 5516205, "Fwd thread id": 1, "Ev Idx": 256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141843834.850, "dur": 87.929, + "args": { + "External id": 502530,"Sequence number": 5516205, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 257 + } + }, + { + "ph": "f", "id": 29, "pid": 2338711, "tid": 2379440, "ts": 6274141843834.850, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141843836.610, "dur": 85.684, + "args": { + "External id": 502531,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274141843838.087, "dur": 25.422, + "args": { + "External id": 502532,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141843842.165, "dur": 2.311, + "args": { + "External id": 502533,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274141843849.570, "dur": 13.688, + "args": { + "External id": 502534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274141843852.113, "dur": 10.696, + "args": { + "External id": 502535,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274141843866.409, "dur": 2.614, + "args": { + "External id": 502536,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141843868.194, "dur": 0.668, + "args": { + "External id": 502537,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141843869.711, "dur": 51.934, + "args": { + "External id": 502538,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274141843927.056, "dur": 15.690, + "args": { + "External id": 502539,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141843950.690, "dur": 392.272, + "args": { + "External id": 502540,"Record function id": 0, "Sequence number": 5516204, "Fwd thread id": 1, "Ev Idx": 267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141843952.188, "dur": 380.412, + "args": { + "External id": 502541,"Sequence number": 5516204, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 268 + } + }, + { + "ph": "f", "id": 30, "pid": 2338711, "tid": 2379440, "ts": 6274141843952.188, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274141844159.766, "dur": 47.634, + "args": { + "External id": 502542,"kernel_hash": "cukbbbqr5h34iuuctj6lctslc3gc7fmn2y2xm57dvknbmyb4o2vg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "131072", "4096", "1", "993", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukbbbqr5h34iuuctj6lctslc3gc7fmn2y2xm57dvknbmyb4o2vg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[131072, 4096], [4096], [131072, 4096], [131072, 4096], [132, 4096], [131072], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_0", "pid": 2338711, "tid": 2379440, + "ts": 6274141844247.076, "dur": 24.145, + "args": { + "External id": 502543,"kernel_hash": "cjqdkew4nkqxnqrjfb6bdwoluwj7gci6uynymszp5r5s3yejimwu", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/jq/cjqdkew4nkqxnqrjfb6bdwoluwj7gci6uynymszp5r5s3yejimwu.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_1", "pid": 2338711, "tid": 2379440, + "ts": 6274141844288.784, "dur": 18.752, + "args": { + "External id": 502544,"kernel_hash": "ccvsds3iqc22g7vjoxnqkxggpevktgq2apwlte6cs2nh2wkycybr", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/cv/ccvsds3iqc22g7vjoxnqkxggpevktgq2apwlte6cs2nh2wkycybr.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141844353.867, "dur": 14.279, + "args": { + "External id": 502545,"Record function id": 0, "Ev Idx": 272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141844356.382, "dur": 10.797, + "args": { + "External id": 502546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141844359.702, "dur": 6.601, + "args": { + "External id": 502547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141844362.816, "dur": 3.326, + "args": { + "External id": 502548,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: StackBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141844372.050, "dur": 36.494, + "args": { + "External id": 502549,"Record function id": 0, "Sequence number": 5516203, "Fwd thread id": 1, "Ev Idx": 276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "StackBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141844373.027, "dur": 27.915, + "args": { + "External id": 502550,"Sequence number": 5516203, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 277 + } + }, + { + "ph": "f", "id": 31, "pid": 2338711, "tid": 2379440, "ts": 6274141844373.027, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2379440, + "ts": 6274141844375.088, "dur": 8.844, + "args": { + "External id": 502551,"Record function id": 0, "Concrete Inputs": ["", "-2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141844380.633, "dur": 1.309, + "args": { + "External id": 502552,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2379440, + "ts": 6274141844385.123, "dur": 7.439, + "args": { + "External id": 502553,"Record function id": 0, "Concrete Inputs": ["", "-2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141844391.212, "dur": 0.554, + "args": { + "External id": 502554,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2379440, + "ts": 6274141844393.141, "dur": 2.354, + "args": { + "External id": 502555,"Record function id": 0, "Concrete Inputs": ["", "-2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141844394.426, "dur": 0.353, + "args": { + "External id": 502556,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2379440, + "ts": 6274141844396.206, "dur": 4.084, + "args": { + "External id": 502557,"Record function id": 0, "Concrete Inputs": ["", "-2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141844399.080, "dur": 0.570, + "args": { + "External id": 502558,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141844412.013, "dur": 5.799, + "args": { + "External id": 502559,"Record function id": 0, "Sequence number": 5516202, "Fwd thread id": 1, "Ev Idx": 286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141844412.749, "dur": 1.290, + "args": { + "External id": 502560,"Sequence number": 5516202, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 287 + } + }, + { + "ph": "f", "id": 32, "pid": 2338711, "tid": 2379440, "ts": 6274141844412.749, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141844422.039, "dur": 563.702, + "args": { + "External id": 502561,"Record function id": 0, "Sequence number": 5516201, "Fwd thread id": 1, "Ev Idx": 288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141844423.048, "dur": 547.528, + "args": { + "External id": 502562,"Sequence number": 5516201, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 289 + } + }, + { + "ph": "f", "id": 33, "pid": 2338711, "tid": 2379440, "ts": 6274141844423.048, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141844461.181, "dur": 12.528, + "args": { + "External id": 502563,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338711, "tid": 2379440, + "ts": 6274141844469.350, "dur": 3.946, + "args": { + "External id": 502564,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141844477.925, "dur": 5.923, + "args": { + "External id": 502565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141844480.520, "dur": 2.502, + "args": { + "External id": 502566,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141844482.104, "dur": 0.664, + "args": { + "External id": 502567,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2379440, + "ts": 6274141844489.943, "dur": 158.099, + "args": { + "External id": 502568,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141844491.377, "dur": 8.139, + "args": { + "External id": 502569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141844492.250, "dur": 6.656, + "args": { + "External id": 502570,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141844495.919, "dur": 2.915, + "args": { + "External id": 502571,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2379440, + "ts": 6274141844501.283, "dur": 145.698, + "args": { + "External id": 502572,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141844503.196, "dur": 142.084, + "args": { + "External id": 502573,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274141844657.108, "dur": 8.924, + "args": { + "External id": 502574,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141844662.426, "dur": 3.425, + "args": { + "External id": 502575,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141844702.410, "dur": 7.234, + "args": { + "External id": 502576,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141844711.022, "dur": 1.976, + "args": { + "External id": 502577,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141844716.473, "dur": 1.558, + "args": { + "External id": 502578,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141844754.980, "dur": 2.894, + "args": { + "External id": 502579,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141844756.190, "dur": 1.486, + "args": { + "External id": 502580,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338711, "tid": 2379440, + "ts": 6274141844786.747, "dur": 158.672, + "args": { + "External id": 502581,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2379440, + "ts": 6274141844794.193, "dur": 10.246, + "args": { + "External id": 502582,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141844800.576, "dur": 1.092, + "args": { + "External id": 502583,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274141844806.532, "dur": 8.142, + "args": { + "External id": 502584,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141844810.871, "dur": 3.061, + "args": { + "External id": 502585,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2379440, + "ts": 6274141844816.204, "dur": 4.723, + "args": { + "External id": 502586,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141844820.152, "dur": 0.425, + "args": { + "External id": 502587,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274141844821.395, "dur": 4.972, + "args": { + "External id": 502588,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141844825.366, "dur": 0.499, + "args": { + "External id": 502589,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274141844830.083, "dur": 4.222, + "args": { + "External id": 502590,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141844833.367, "dur": 0.619, + "args": { + "External id": 502591,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141844837.518, "dur": 8.408, + "args": { + "External id": 502592,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338711, "tid": 2379440, + "ts": 6274141844843.488, "dur": 2.238, + "args": { + "External id": 502593,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274141844846.531, "dur": 3.805, + "args": { + "External id": 502594,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141844849.582, "dur": 0.448, + "args": { + "External id": 502595,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141844851.017, "dur": 1.929, + "args": { + "External id": 502596,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141844851.852, "dur": 1.006, + "args": { + "External id": 502597,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274141844854.791, "dur": 73.466, + "args": { + "External id": 502598,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141844930.408, "dur": 3.492, + "args": { + "External id": 502599,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274141844938.200, "dur": 2.933, + "args": { + "External id": 502600,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141844939.761, "dur": 0.796, + "args": { + "External id": 502601,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141844942.912, "dur": 0.979, + "args": { + "External id": 502602,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141844999.320, "dur": 11.443, + "args": { + "External id": 502603,"Record function id": 0, "Ev Idx": 330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141845001.827, "dur": 8.054, + "args": { + "External id": 502604,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141845004.733, "dur": 3.994, + "args": { + "External id": 502605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141845006.310, "dur": 2.266, + "args": { + "External id": 502606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141845014.999, "dur": 9.389, + "args": { + "External id": 502607,"Record function id": 0, "Sequence number": 5516200, "Fwd thread id": 1, "Ev Idx": 334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141845015.872, "dur": 6.033, + "args": { + "External id": 502608,"Sequence number": 5516200, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 335 + } + }, + { + "ph": "f", "id": 34, "pid": 2338711, "tid": 2379440, "ts": 6274141845015.872, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141845019.875, "dur": 1.834, + "args": { + "External id": 502609,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141845020.756, "dur": 0.788, + "args": { + "External id": 502610,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141845028.371, "dur": 186.852, + "args": { + "External id": 502611,"Record function id": 0, "Sequence number": 5516199, "Fwd thread id": 1, "Ev Idx": 338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141845029.294, "dur": 176.410, + "args": { + "External id": 502612,"Sequence number": 5516199, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 339 + } + }, + { + "ph": "f", "id": 35, "pid": 2338711, "tid": 2379440, "ts": 6274141845029.294, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141845036.745, "dur": 4.923, + "args": { + "External id": 502613,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141845038.861, "dur": 2.223, + "args": { + "External id": 502614,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141845040.250, "dur": 0.633, + "args": { + "External id": 502615,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141845043.338, "dur": 93.979, + "args": { + "External id": 502616,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141845140.148, "dur": 7.862, + "args": { + "External id": 502617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141845141.140, "dur": 5.776, + "args": { + "External id": 502618,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141845145.631, "dur": 1.110, + "args": { + "External id": 502619,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141845150.015, "dur": 4.783, + "args": { + "External id": 502620,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141845151.025, "dur": 3.312, + "args": { + "External id": 502621,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141845153.864, "dur": 0.371, + "args": { + "External id": 502622,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141845155.671, "dur": 49.144, + "args": { + "External id": 502623,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141845222.393, "dur": 10.294, + "args": { + "External id": 502624,"Record function id": 0, "Sequence number": 5516198, "Fwd thread id": 1, "Ev Idx": 351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141845223.693, "dur": 7.116, + "args": { + "External id": 502625,"Sequence number": 5516198, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 352 + } + }, + { + "ph": "f", "id": 36, "pid": 2338711, "tid": 2379440, "ts": 6274141845223.693, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141845225.416, "dur": 5.246, + "args": { + "External id": 502626,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141845226.247, "dur": 4.273, + "args": { + "External id": 502627,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141845236.129, "dur": 12.393, + "args": { + "External id": 502628,"Record function id": 0, "Sequence number": 5516197, "Fwd thread id": 1, "Ev Idx": 355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141845239.968, "dur": 6.313, + "args": { + "External id": 502629,"Sequence number": 5516197, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 356 + } + }, + { + "ph": "f", "id": 37, "pid": 2338711, "tid": 2379440, "ts": 6274141845239.968, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141845240.835, "dur": 5.245, + "args": { + "External id": 502630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141845241.833, "dur": 3.735, + "args": { + "External id": 502631,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141845245.087, "dur": 0.368, + "args": { + "External id": 502632,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141845252.595, "dur": 5.876, + "args": { + "External id": 502633,"Record function id": 0, "Ev Idx": 360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141845253.694, "dur": 4.248, + "args": { + "External id": 502634,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141845255.095, "dur": 2.532, + "args": { + "External id": 502635,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141845255.936, "dur": 1.560, + "args": { + "External id": 502636,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141845263.569, "dur": 10.513, + "args": { + "External id": 502637,"Record function id": 0, "Sequence number": 5516196, "Fwd thread id": 1, "Ev Idx": 364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141845264.343, "dur": 6.297, + "args": { + "External id": 502638,"Sequence number": 5516196, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 365 + } + }, + { + "ph": "f", "id": 38, "pid": 2338711, "tid": 2379440, "ts": 6274141845264.343, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141845265.176, "dur": 5.328, + "args": { + "External id": 502639,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141845269.247, "dur": 1.122, + "args": { + "External id": 502640,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141845277.509, "dur": 122.579, + "args": { + "External id": 502641,"Record function id": 0, "Sequence number": 5516195, "Fwd thread id": 1, "Ev Idx": 368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141845278.290, "dur": 113.530, + "args": { + "External id": 502642,"Sequence number": 5516195, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 369 + } + }, + { + "ph": "f", "id": 39, "pid": 2338711, "tid": 2379440, "ts": 6274141845278.290, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141845280.742, "dur": 2.682, + "args": { + "External id": 502643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141845281.643, "dur": 1.361, + "args": { + "External id": 502644,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141845282.357, "dur": 0.542, + "args": { + "External id": 502645,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141845286.551, "dur": 47.119, + "args": { + "External id": 502646,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141845334.770, "dur": 6.097, + "args": { + "External id": 502647,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141845335.688, "dur": 4.615, + "args": { + "External id": 502648,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141845339.091, "dur": 1.085, + "args": { + "External id": 502649,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141845341.978, "dur": 5.359, + "args": { + "External id": 502650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141845342.949, "dur": 4.016, + "args": { + "External id": 502651,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141845346.511, "dur": 0.379, + "args": { + "External id": 502652,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141845348.003, "dur": 42.944, + "args": { + "External id": 502653,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141845404.739, "dur": 38.682, + "args": { + "External id": 502654,"Record function id": 0, "Sequence number": 5516194, "Fwd thread id": 1, "Ev Idx": 381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141845405.864, "dur": 6.637, + "args": { + "External id": 502655,"Sequence number": 5516194, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 382 + } + }, + { + "ph": "f", "id": 40, "pid": 2338711, "tid": 2379440, "ts": 6274141845405.864, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141845407.009, "dur": 5.351, + "args": { + "External id": 502656,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141845411.258, "dur": 1.009, + "args": { + "External id": 502657,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2379440, + "ts": 6274141845416.211, "dur": 24.433, + "args": { + "External id": 502658,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141845447.153, "dur": 10.644, + "args": { + "External id": 502659,"Record function id": 0, "Sequence number": 5516193, "Fwd thread id": 1, "Ev Idx": 386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141845448.318, "dur": 7.121, + "args": { + "External id": 502660,"Sequence number": 5516193, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 387 + } + }, + { + "ph": "f", "id": 41, "pid": 2338711, "tid": 2379440, "ts": 6274141845448.318, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141845451.866, "dur": 3.354, + "args": { + "External id": 502661,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141845452.877, "dur": 1.879, + "args": { + "External id": 502662,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141845453.828, "dur": 0.778, + "args": { + "External id": 502663,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141845461.967, "dur": 5.296, + "args": { + "External id": 502664,"Record function id": 0, "Ev Idx": 391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141845463.331, "dur": 3.449, + "args": { + "External id": 502665,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141845464.293, "dur": 2.194, + "args": { + "External id": 502666,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141845464.888, "dur": 1.503, + "args": { + "External id": 502667,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141845471.627, "dur": 498.218, + "args": { + "External id": 502668,"Record function id": 0, "Sequence number": 5516192, "Fwd thread id": 1, "Ev Idx": 395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141845473.020, "dur": 481.665, + "args": { + "External id": 502669,"Sequence number": 5516192, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 396 + } + }, + { + "ph": "f", "id": 42, "pid": 2338711, "tid": 2379440, "ts": 6274141845473.020, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2379440, + "ts": 6274141845504.242, "dur": 36.091, + "args": { + "External id": 502670,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274141845506.061, "dur": 34.088, + "args": { + "External id": 502671,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141845509.234, "dur": 6.076, + "args": { + "External id": 502672,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141845511.687, "dur": 3.109, + "args": { + "External id": 502673,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141845516.653, "dur": 22.990, + "args": { + "External id": 502674,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141845555.404, "dur": 2.127, + "args": { + "External id": 502675,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141845556.356, "dur": 1.075, + "args": { + "External id": 502676,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141845561.881, "dur": 1.646, + "args": { + "External id": 502677,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141845562.516, "dur": 0.915, + "args": { + "External id": 502678,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141845581.458, "dur": 24.547, + "args": { + "External id": 502679,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141845624.585, "dur": 4.149, + "args": { + "External id": 502680,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141845826.414, "dur": 4.513, + "args": { + "External id": 502681,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141845835.901, "dur": 42.117, + "args": { + "External id": 502682,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141845850.521, "dur": 1.406, + "args": { + "External id": 502683,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274141845884.569, "dur": 34.084, + "args": { + "External id": 502684,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274141845886.801, "dur": 31.646, + "args": { + "External id": 502685,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141845891.319, "dur": 4.492, + "args": { + "External id": 502686,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141845899.890, "dur": 17.907, + "args": { + "External id": 502687,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274141845923.574, "dur": 2.498, + "args": { + "External id": 502688,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141845924.865, "dur": 1.047, + "args": { + "External id": 502689,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141845933.054, "dur": 5.554, + "args": { + "External id": 502690,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141845936.880, "dur": 1.572, + "args": { + "External id": 502691,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141845940.818, "dur": 2.053, + "args": { + "External id": 502692,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141845941.791, "dur": 0.988, + "args": { + "External id": 502693,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141845985.392, "dur": 10.557, + "args": { + "External id": 502694,"Record function id": 0, "Ev Idx": 421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141845988.108, "dur": 7.050, + "args": { + "External id": 502695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141845990.695, "dur": 3.473, + "args": { + "External id": 502696,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141845992.083, "dur": 1.972, + "args": { + "External id": 502697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141845999.883, "dur": 11.019, + "args": { + "External id": 502698,"Record function id": 0, "Sequence number": 5516191, "Fwd thread id": 1, "Ev Idx": 425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141846000.916, "dur": 6.687, + "args": { + "External id": 502699,"Sequence number": 5516191, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 426 + } + }, + { + "ph": "f", "id": 43, "pid": 2338711, "tid": 2379440, "ts": 6274141846000.916, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141846005.067, "dur": 2.306, + "args": { + "External id": 502700,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141846006.023, "dur": 1.223, + "args": { + "External id": 502701,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141846017.228, "dur": 226.215, + "args": { + "External id": 502702,"Record function id": 0, "Sequence number": 5516190, "Fwd thread id": 1, "Ev Idx": 429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141846018.663, "dur": 218.367, + "args": { + "External id": 502703,"Sequence number": 5516190, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 430 + } + }, + { + "ph": "f", "id": 44, "pid": 2338711, "tid": 2379440, "ts": 6274141846018.663, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141846022.363, "dur": 5.239, + "args": { + "External id": 502704,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141846024.168, "dur": 2.788, + "args": { + "External id": 502705,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141846025.618, "dur": 1.102, + "args": { + "External id": 502706,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141846028.945, "dur": 131.623, + "args": { + "External id": 502707,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141846164.143, "dur": 10.893, + "args": { + "External id": 502708,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141846165.949, "dur": 7.952, + "args": { + "External id": 502709,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141846169.788, "dur": 3.933, + "args": { + "External id": 502710,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141846177.425, "dur": 5.900, + "args": { + "External id": 502711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141846178.809, "dur": 4.068, + "args": { + "External id": 502712,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141846182.356, "dur": 0.449, + "args": { + "External id": 502713,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141846184.192, "dur": 51.931, + "args": { + "External id": 502714,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141846250.556, "dur": 10.507, + "args": { + "External id": 502715,"Record function id": 0, "Sequence number": 5516189, "Fwd thread id": 1, "Ev Idx": 442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141846251.701, "dur": 7.081, + "args": { + "External id": 502716,"Sequence number": 5516189, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 443 + } + }, + { + "ph": "f", "id": 45, "pid": 2338711, "tid": 2379440, "ts": 6274141846251.701, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141846253.328, "dur": 5.296, + "args": { + "External id": 502717,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141846257.015, "dur": 1.494, + "args": { + "External id": 502718,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141846264.914, "dur": 9.723, + "args": { + "External id": 502719,"Record function id": 0, "Sequence number": 5516188, "Fwd thread id": 1, "Ev Idx": 446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141846265.737, "dur": 7.348, + "args": { + "External id": 502720,"Sequence number": 5516188, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 447 + } + }, + { + "ph": "f", "id": 46, "pid": 2338711, "tid": 2379440, "ts": 6274141846265.737, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141846266.903, "dur": 5.973, + "args": { + "External id": 502721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141846267.657, "dur": 4.754, + "args": { + "External id": 502722,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141846271.510, "dur": 0.762, + "args": { + "External id": 502723,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141846279.114, "dur": 6.697, + "args": { + "External id": 502724,"Record function id": 0, "Ev Idx": 451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141846280.527, "dur": 4.778, + "args": { + "External id": 502725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141846282.121, "dur": 2.864, + "args": { + "External id": 502726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141846283.336, "dur": 1.528, + "args": { + "External id": 502727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141846288.986, "dur": 9.232, + "args": { + "External id": 502728,"Record function id": 0, "Sequence number": 5516187, "Fwd thread id": 1, "Ev Idx": 455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141846292.561, "dur": 3.889, + "args": { + "External id": 502729,"Sequence number": 5516187, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 456 + } + }, + { + "ph": "f", "id": 47, "pid": 2338711, "tid": 2379440, "ts": 6274141846292.561, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141846294.074, "dur": 2.234, + "args": { + "External id": 502730,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141846294.998, "dur": 1.201, + "args": { + "External id": 502731,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141846305.074, "dur": 448.880, + "args": { + "External id": 502732,"Record function id": 0, "Sequence number": 5516186, "Fwd thread id": 1, "Ev Idx": 459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141846306.416, "dur": 426.518, + "args": { + "External id": 502733,"Sequence number": 5516186, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 460 + } + }, + { + "ph": "f", "id": 48, "pid": 2338711, "tid": 2379440, "ts": 6274141846306.416, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141846326.202, "dur": 12.402, + "args": { + "External id": 502734,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141846333.116, "dur": 4.876, + "args": { + "External id": 502735,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141846340.772, "dur": 5.272, + "args": { + "External id": 502736,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141846341.661, "dur": 4.129, + "args": { + "External id": 502737,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141846347.622, "dur": 6.962, + "args": { + "External id": 502738,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141846351.272, "dur": 3.072, + "args": { + "External id": 502739,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141846388.152, "dur": 314.109, + "args": { + "External id": 502740,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141846474.638, "dur": 3.000, + "args": { + "External id": 502741,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141846481.862, "dur": 2.104, + "args": { + "External id": 502742,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141846486.939, "dur": 1.814, + "args": { + "External id": 502743,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141846489.479, "dur": 3.174, + "args": { + "External id": 502744,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141846553.014, "dur": 2.455, + "args": { + "External id": 502745,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141846553.936, "dur": 1.412, + "args": { + "External id": 502746,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141846556.966, "dur": 51.220, + "args": { + "External id": 502747,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141846561.591, "dur": 2.974, + "args": { + "External id": 502748,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141846611.420, "dur": 2.770, + "args": { + "External id": 502749,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141846612.725, "dur": 1.295, + "args": { + "External id": 502750,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141846620.010, "dur": 27.291, + "args": { + "External id": 502751,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141846625.262, "dur": 3.421, + "args": { + "External id": 502752,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274141846718.742, "dur": 4.317, + "args": { + "External id": 502753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274141846725.858, "dur": 0.927, + "args": { + "External id": 502754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274141846728.810, "dur": 1.015, + "args": { + "External id": 502755,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141846766.025, "dur": 250.485, + "args": { + "External id": 502756,"Record function id": 0, "Sequence number": 5516185, "Fwd thread id": 1, "Ev Idx": 483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141846768.130, "dur": 241.443, + "args": { + "External id": 502757,"Sequence number": 5516185, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 484 + } + }, + { + "ph": "f", "id": 49, "pid": 2338711, "tid": 2379440, "ts": 6274141846768.130, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274141846792.233, "dur": 51.082, + "args": { + "External id": 502758,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141846796.388, "dur": 4.722, + "args": { + "External id": 502759,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141846802.928, "dur": 39.618, + "args": { + "External id": 502760,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141846854.590, "dur": 7.363, + "args": { + "External id": 502761,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141846858.757, "dur": 2.835, + "args": { + "External id": 502762,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141847023.602, "dur": 230.783, + "args": { + "External id": 502763,"Record function id": 0, "Sequence number": 5516184, "Fwd thread id": 1, "Ev Idx": 490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141847025.470, "dur": 220.190, + "args": { + "External id": 502764,"Sequence number": 5516184, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 491 + } + }, + { + "ph": "f", "id": 50, "pid": 2338711, "tid": 2379440, "ts": 6274141847025.470, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274141847037.226, "dur": 88.511, + "args": { + "External id": 502765,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141847040.063, "dur": 3.177, + "args": { + "External id": 502766,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141847044.372, "dur": 80.369, + "args": { + "External id": 502767,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141847135.854, "dur": 10.957, + "args": { + "External id": 502768,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141847142.275, "dur": 4.169, + "args": { + "External id": 502769,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847263.039, "dur": 15.925, + "args": { + "External id": 502770,"Record function id": 0, "Sequence number": 5516183, "Fwd thread id": 1, "Ev Idx": 497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847266.908, "dur": 9.377, + "args": { + "External id": 502771,"Sequence number": 5516183, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 498 + } + }, + { + "ph": "f", "id": 51, "pid": 2338711, "tid": 2379440, "ts": 6274141847266.908, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141847269.332, "dur": 6.694, + "args": { + "External id": 502772,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141847270.941, "dur": 4.877, + "args": { + "External id": 502773,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847282.527, "dur": 11.771, + "args": { + "External id": 502774,"Record function id": 0, "Sequence number": 5516182, "Fwd thread id": 1, "Ev Idx": 501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847283.547, "dur": 8.665, + "args": { + "External id": 502775,"Sequence number": 5516182, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 502 + } + }, + { + "ph": "f", "id": 52, "pid": 2338711, "tid": 2379440, "ts": 6274141847283.547, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141847284.555, "dur": 7.494, + "args": { + "External id": 502776,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141847288.481, "dur": 3.432, + "args": { + "External id": 502777,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847297.300, "dur": 8.099, + "args": { + "External id": 502778,"Record function id": 0, "Sequence number": 5516181, "Fwd thread id": 1, "Ev Idx": 505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847298.210, "dur": 5.214, + "args": { + "External id": 502779,"Sequence number": 5516181, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 506 + } + }, + { + "ph": "f", "id": 53, "pid": 2338711, "tid": 2379440, "ts": 6274141847298.210, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141847299.249, "dur": 4.036, + "args": { + "External id": 502780,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141847302.549, "dur": 0.616, + "args": { + "External id": 502781,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847308.899, "dur": 39.255, + "args": { + "External id": 502782,"Record function id": 0, "Sequence number": 5516180, "Fwd thread id": 1, "Ev Idx": 509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847309.995, "dur": 35.355, + "args": { + "External id": 502783,"Sequence number": 5516180, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 510 + } + }, + { + "ph": "f", "id": 54, "pid": 2338711, "tid": 2379440, "ts": 6274141847309.995, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141847313.565, "dur": 31.613, + "args": { + "External id": 502784,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141847344.142, "dur": 0.924, + "args": { + "External id": 502785,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847351.598, "dur": 172.011, + "args": { + "External id": 502786,"Record function id": 0, "Sequence number": 5516179, "Fwd thread id": 1, "Ev Idx": 513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847352.358, "dur": 163.501, + "args": { + "External id": 502787,"Sequence number": 5516179, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 514 + } + }, + { + "ph": "f", "id": 55, "pid": 2338711, "tid": 2379440, "ts": 6274141847352.358, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141847358.892, "dur": 7.078, + "args": { + "External id": 502788,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141847361.059, "dur": 4.320, + "args": { + "External id": 502789,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141847363.229, "dur": 1.853, + "args": { + "External id": 502790,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141847367.460, "dur": 81.246, + "args": { + "External id": 502791,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141847450.193, "dur": 7.987, + "args": { + "External id": 502792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141847450.871, "dur": 6.492, + "args": { + "External id": 502793,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141847454.806, "dur": 2.384, + "args": { + "External id": 502794,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141847459.901, "dur": 6.539, + "args": { + "External id": 502795,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141847460.849, "dur": 5.157, + "args": { + "External id": 502796,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141847465.393, "dur": 0.548, + "args": { + "External id": 502797,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141847466.941, "dur": 48.099, + "args": { + "External id": 502798,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847528.751, "dur": 11.309, + "args": { + "External id": 502799,"Record function id": 0, "Sequence number": 5516178, "Fwd thread id": 1, "Ev Idx": 526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847529.881, "dur": 8.566, + "args": { + "External id": 502800,"Sequence number": 5516178, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 527 + } + }, + { + "ph": "f", "id": 56, "pid": 2338711, "tid": 2379440, "ts": 6274141847529.881, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141847531.202, "dur": 7.105, + "args": { + "External id": 502801,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141847534.075, "dur": 4.098, + "args": { + "External id": 502802,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847543.386, "dur": 8.492, + "args": { + "External id": 502803,"Record function id": 0, "Sequence number": 5516177, "Fwd thread id": 1, "Ev Idx": 530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847544.205, "dur": 6.078, + "args": { + "External id": 502804,"Sequence number": 5516177, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 531 + } + }, + { + "ph": "f", "id": 57, "pid": 2338711, "tid": 2379440, "ts": 6274141847544.205, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141847544.938, "dur": 5.136, + "args": { + "External id": 502805,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141847545.820, "dur": 3.770, + "args": { + "External id": 502806,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141847548.958, "dur": 0.547, + "args": { + "External id": 502807,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141847557.891, "dur": 12.665, + "args": { + "External id": 502808,"Record function id": 0, "Ev Idx": 535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141847559.618, "dur": 10.100, + "args": { + "External id": 502809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141847562.243, "dur": 7.064, + "args": { + "External id": 502810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141847566.466, "dur": 2.689, + "args": { + "External id": 502811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847576.761, "dur": 5.193, + "args": { + "External id": 502812,"Record function id": 0, "Sequence number": 5516176, "Fwd thread id": 1, "Ev Idx": 539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847577.507, "dur": 2.395, + "args": { + "External id": 502813,"Sequence number": 5516176, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 540 + } + }, + { + "ph": "f", "id": 58, "pid": 2338711, "tid": 2379440, "ts": 6274141847577.507, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141847578.557, "dur": 1.192, + "args": { + "External id": 502814,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141847578.942, "dur": 0.701, + "args": { + "External id": 502815,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847609.362, "dur": 122.458, + "args": { + "External id": 502816,"Record function id": 0, "Sequence number": 5516175, "Fwd thread id": 1, "Ev Idx": 543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847611.977, "dur": 111.592, + "args": { + "External id": 502817,"Sequence number": 5516175, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 544 + } + }, + { + "ph": "f", "id": 59, "pid": 2338711, "tid": 2379440, "ts": 6274141847611.977, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141847615.868, "dur": 7.205, + "args": { + "External id": 502818,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141847616.978, "dur": 5.304, + "args": { + "External id": 502819,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141847621.022, "dur": 0.929, + "args": { + "External id": 502820,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141847624.214, "dur": 42.207, + "args": { + "External id": 502821,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141847668.012, "dur": 6.490, + "args": { + "External id": 502822,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141847668.651, "dur": 5.156, + "args": { + "External id": 502823,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141847672.459, "dur": 1.256, + "args": { + "External id": 502824,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141847675.878, "dur": 4.781, + "args": { + "External id": 502825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141847678.976, "dur": 1.293, + "args": { + "External id": 502826,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141847679.747, "dur": 0.435, + "args": { + "External id": 502827,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141847681.046, "dur": 41.874, + "args": { + "External id": 502828,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847737.649, "dur": 39.736, + "args": { + "External id": 502829,"Record function id": 0, "Sequence number": 5516174, "Fwd thread id": 1, "Ev Idx": 556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847738.559, "dur": 6.360, + "args": { + "External id": 502830,"Sequence number": 5516174, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 557 + } + }, + { + "ph": "f", "id": 60, "pid": 2338711, "tid": 2379440, "ts": 6274141847738.559, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141847739.962, "dur": 4.816, + "args": { + "External id": 502831,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141847743.415, "dur": 1.230, + "args": { + "External id": 502832,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2379440, + "ts": 6274141847751.977, "dur": 22.703, + "args": { + "External id": 502833,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847781.336, "dur": 9.967, + "args": { + "External id": 502834,"Record function id": 0, "Sequence number": 5516173, "Fwd thread id": 1, "Ev Idx": 561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847784.687, "dur": 4.389, + "args": { + "External id": 502835,"Sequence number": 5516173, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 562 + } + }, + { + "ph": "f", "id": 61, "pid": 2338711, "tid": 2379440, "ts": 6274141847784.687, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141847785.259, "dur": 3.600, + "args": { + "External id": 502836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141847786.205, "dur": 2.190, + "args": { + "External id": 502837,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141847787.670, "dur": 0.619, + "args": { + "External id": 502838,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141847795.877, "dur": 6.492, + "args": { + "External id": 502839,"Record function id": 0, "Ev Idx": 566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141847797.364, "dur": 4.479, + "args": { + "External id": 502840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141847798.921, "dur": 2.562, + "args": { + "External id": 502841,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141847799.676, "dur": 1.689, + "args": { + "External id": 502842,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847805.443, "dur": 13.531, + "args": { + "External id": 502843,"Record function id": 0, "Sequence number": 5516172, "Fwd thread id": 1, "Ev Idx": 570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847809.550, "dur": 7.319, + "args": { + "External id": 502844,"Sequence number": 5516172, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 571 + } + }, + { + "ph": "f", "id": 62, "pid": 2338711, "tid": 2379440, "ts": 6274141847809.550, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141847812.983, "dur": 3.755, + "args": { + "External id": 502845,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141847815.800, "dur": 0.832, + "args": { + "External id": 502846,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847821.980, "dur": 106.927, + "args": { + "External id": 502847,"Record function id": 0, "Sequence number": 5516171, "Fwd thread id": 1, "Ev Idx": 574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847822.887, "dur": 98.407, + "args": { + "External id": 502848,"Sequence number": 5516171, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 575 + } + }, + { + "ph": "f", "id": 63, "pid": 2338711, "tid": 2379440, "ts": 6274141847822.887, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141847824.822, "dur": 2.262, + "args": { + "External id": 502849,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141847825.313, "dur": 1.365, + "args": { + "External id": 502850,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141847826.119, "dur": 0.470, + "args": { + "External id": 502851,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141847833.053, "dur": 36.362, + "args": { + "External id": 502852,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141847870.210, "dur": 7.240, + "args": { + "External id": 502853,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141847870.875, "dur": 5.965, + "args": { + "External id": 502854,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141847871.923, "dur": 4.781, + "args": { + "External id": 502855,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141847878.537, "dur": 4.866, + "args": { + "External id": 502856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141847879.376, "dur": 3.524, + "args": { + "External id": 502857,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141847882.340, "dur": 0.490, + "args": { + "External id": 502858,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141847886.210, "dur": 34.552, + "args": { + "External id": 502859,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847933.194, "dur": 23.688, + "args": { + "External id": 502860,"Record function id": 0, "Sequence number": 5516170, "Fwd thread id": 1, "Ev Idx": 587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847934.263, "dur": 3.234, + "args": { + "External id": 502861,"Sequence number": 5516170, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 588 + } + }, + { + "ph": "f", "id": 64, "pid": 2338711, "tid": 2379440, "ts": 6274141847934.263, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141847935.623, "dur": 1.739, + "args": { + "External id": 502862,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141847936.221, "dur": 1.018, + "args": { + "External id": 502863,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274141847939.944, "dur": 14.781, + "args": { + "External id": 502864,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847960.564, "dur": 11.890, + "args": { + "External id": 502865,"Record function id": 0, "Sequence number": 5516169, "Fwd thread id": 1, "Ev Idx": 592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141847961.527, "dur": 9.315, + "args": { + "External id": 502866,"Sequence number": 5516169, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 593 + } + }, + { + "ph": "f", "id": 65, "pid": 2338711, "tid": 2379440, "ts": 6274141847961.527, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141847962.437, "dur": 8.182, + "args": { + "External id": 502867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141847963.560, "dur": 6.562, + "args": { + "External id": 502868,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141847969.529, "dur": 0.437, + "args": { + "External id": 502869,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141847976.578, "dur": 5.445, + "args": { + "External id": 502870,"Record function id": 0, "Ev Idx": 597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141847978.135, "dur": 3.436, + "args": { + "External id": 502871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141847979.116, "dur": 2.131, + "args": { + "External id": 502872,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141847979.711, "dur": 1.435, + "args": { + "External id": 502873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141847986.137, "dur": 437.409, + "args": { + "External id": 502874,"Record function id": 0, "Sequence number": 5516168, "Fwd thread id": 1, "Ev Idx": 601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141847987.487, "dur": 400.657, + "args": { + "External id": 502875,"Sequence number": 5516168, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 602 + } + }, + { + "ph": "f", "id": 66, "pid": 2338711, "tid": 2379440, "ts": 6274141847987.487, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141848026.596, "dur": 1.844, + "args": { + "External id": 502876,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141848027.258, "dur": 1.035, + "args": { + "External id": 502877,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141848047.211, "dur": 4.241, + "args": { + "External id": 502878,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141848101.107, "dur": 5.744, + "args": { + "External id": 502879,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141848278.531, "dur": 3.046, + "args": { + "External id": 502880,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141848286.078, "dur": 36.354, + "args": { + "External id": 502881,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141848296.219, "dur": 1.273, + "args": { + "External id": 502882,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274141848328.260, "dur": 35.294, + "args": { + "External id": 502883,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274141848330.596, "dur": 32.766, + "args": { + "External id": 502884,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141848337.416, "dur": 5.585, + "args": { + "External id": 502885,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141848345.019, "dur": 17.831, + "args": { + "External id": 502886,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274141848370.526, "dur": 2.044, + "args": { + "External id": 502887,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141848371.504, "dur": 0.928, + "args": { + "External id": 502888,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141848378.917, "dur": 2.062, + "args": { + "External id": 502889,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141848379.563, "dur": 1.316, + "args": { + "External id": 502890,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2379440, + "ts": 6274141848399.134, "dur": 17.761, + "args": { + "External id": 502891,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141848436.998, "dur": 11.543, + "args": { + "External id": 502892,"Record function id": 0, "Ev Idx": 619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141848441.141, "dur": 6.747, + "args": { + "External id": 502893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141848443.827, "dur": 3.188, + "args": { + "External id": 502894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141848444.717, "dur": 2.197, + "args": { + "External id": 502895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141848454.559, "dur": 6.807, + "args": { + "External id": 502896,"Record function id": 0, "Sequence number": 5516167, "Fwd thread id": 1, "Ev Idx": 623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141848455.732, "dur": 1.591, + "args": { + "External id": 502897,"Sequence number": 5516167, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 624 + } + }, + { + "ph": "f", "id": 67, "pid": 2338711, "tid": 2379440, "ts": 6274141848455.732, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141848465.575, "dur": 490.164, + "args": { + "External id": 502898,"Record function id": 0, "Sequence number": 5516166, "Fwd thread id": 1, "Ev Idx": 625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141848466.922, "dur": 475.568, + "args": { + "External id": 502899,"Sequence number": 5516166, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 626 + } + }, + { + "ph": "f", "id": 68, "pid": 2338711, "tid": 2379440, "ts": 6274141848466.922, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141848500.001, "dur": 8.701, + "args": { + "External id": 502900,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338711, "tid": 2379440, + "ts": 6274141848504.909, "dur": 3.480, + "args": { + "External id": 502901,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141848511.651, "dur": 7.489, + "args": { + "External id": 502902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141848513.348, "dur": 5.128, + "args": { + "External id": 502903,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141848517.399, "dur": 0.896, + "args": { + "External id": 502904,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2379440, + "ts": 6274141848525.912, "dur": 119.567, + "args": { + "External id": 502905,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141848526.815, "dur": 4.686, + "args": { + "External id": 502906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141848527.275, "dur": 3.822, + "args": { + "External id": 502907,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141848527.980, "dur": 3.010, + "args": { + "External id": 502908,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2379440, + "ts": 6274141848532.568, "dur": 112.240, + "args": { + "External id": 502909,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141848534.008, "dur": 109.298, + "args": { + "External id": 502910,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274141848653.269, "dur": 6.419, + "args": { + "External id": 502911,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141848657.421, "dur": 2.037, + "args": { + "External id": 502912,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141848695.813, "dur": 6.983, + "args": { + "External id": 502913,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141848705.802, "dur": 1.634, + "args": { + "External id": 502914,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141848708.041, "dur": 1.476, + "args": { + "External id": 502915,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141848745.023, "dur": 2.092, + "args": { + "External id": 502916,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141848745.706, "dur": 1.222, + "args": { + "External id": 502917,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338711, "tid": 2379440, + "ts": 6274141848771.737, "dur": 147.750, + "args": { + "External id": 502918,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2379440, + "ts": 6274141848777.573, "dur": 9.304, + "args": { + "External id": 502919,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141848783.535, "dur": 2.347, + "args": { + "External id": 502920,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274141848788.227, "dur": 9.219, + "args": { + "External id": 502921,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141848793.019, "dur": 3.607, + "args": { + "External id": 502922,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2379440, + "ts": 6274141848798.926, "dur": 2.249, + "args": { + "External id": 502923,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141848800.141, "dur": 0.656, + "args": { + "External id": 502924,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274141848804.353, "dur": 1.657, + "args": { + "External id": 502925,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141848805.046, "dur": 0.656, + "args": { + "External id": 502926,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274141848812.659, "dur": 1.889, + "args": { + "External id": 502927,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141848813.695, "dur": 0.582, + "args": { + "External id": 502928,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141848817.132, "dur": 7.831, + "args": { + "External id": 502929,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338711, "tid": 2379440, + "ts": 6274141848822.804, "dur": 2.012, + "args": { + "External id": 502930,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274141848825.695, "dur": 3.427, + "args": { + "External id": 502931,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141848828.531, "dur": 0.326, + "args": { + "External id": 502932,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141848829.644, "dur": 3.767, + "args": { + "External id": 502933,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141848830.877, "dur": 2.427, + "args": { + "External id": 502934,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274141848835.132, "dur": 66.592, + "args": { + "External id": 502935,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141848906.455, "dur": 3.766, + "args": { + "External id": 502936,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274141848910.918, "dur": 4.325, + "args": { + "External id": 502937,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141848914.464, "dur": 0.414, + "args": { + "External id": 502938,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141848917.573, "dur": 0.831, + "args": { + "External id": 502939,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141848968.797, "dur": 10.495, + "args": { + "External id": 502940,"Record function id": 0, "Ev Idx": 667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141848971.307, "dur": 6.981, + "args": { + "External id": 502941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141848973.827, "dur": 3.621, + "args": { + "External id": 502942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141848975.032, "dur": 2.300, + "args": { + "External id": 502943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141848983.242, "dur": 7.083, + "args": { + "External id": 502944,"Record function id": 0, "Sequence number": 5516165, "Fwd thread id": 1, "Ev Idx": 671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141848984.180, "dur": 3.466, + "args": { + "External id": 502945,"Sequence number": 5516165, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 672 + } + }, + { + "ph": "f", "id": 69, "pid": 2338711, "tid": 2379440, "ts": 6274141848984.180, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141848985.842, "dur": 1.578, + "args": { + "External id": 502946,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141848986.606, "dur": 0.704, + "args": { + "External id": 502947,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141848993.839, "dur": 177.238, + "args": { + "External id": 502948,"Record function id": 0, "Sequence number": 5516164, "Fwd thread id": 1, "Ev Idx": 675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141848997.979, "dur": 164.457, + "args": { + "External id": 502949,"Sequence number": 5516164, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 676 + } + }, + { + "ph": "f", "id": 70, "pid": 2338711, "tid": 2379440, "ts": 6274141848997.979, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141849001.644, "dur": 4.457, + "args": { + "External id": 502950,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141849003.495, "dur": 2.020, + "args": { + "External id": 502951,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141849004.648, "dur": 0.701, + "args": { + "External id": 502952,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141849007.447, "dur": 44.130, + "args": { + "External id": 502953,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141849052.709, "dur": 52.641, + "args": { + "External id": 502954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141849053.466, "dur": 50.636, + "args": { + "External id": 502955,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141849102.225, "dur": 1.386, + "args": { + "External id": 502956,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141849107.157, "dur": 6.722, + "args": { + "External id": 502957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141849112.075, "dur": 1.238, + "args": { + "External id": 502958,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141849112.762, "dur": 0.458, + "args": { + "External id": 502959,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141849114.666, "dur": 46.813, + "args": { + "External id": 502960,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141849178.727, "dur": 11.720, + "args": { + "External id": 502961,"Record function id": 0, "Sequence number": 5516163, "Fwd thread id": 1, "Ev Idx": 688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141849179.919, "dur": 8.750, + "args": { + "External id": 502962,"Sequence number": 5516163, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 689 + } + }, + { + "ph": "f", "id": 71, "pid": 2338711, "tid": 2379440, "ts": 6274141849179.919, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141849184.035, "dur": 4.483, + "args": { + "External id": 502963,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141849186.937, "dur": 1.455, + "args": { + "External id": 502964,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141849193.787, "dur": 6.786, + "args": { + "External id": 502965,"Record function id": 0, "Sequence number": 5516162, "Fwd thread id": 1, "Ev Idx": 692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141849194.680, "dur": 3.787, + "args": { + "External id": 502966,"Sequence number": 5516162, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 693 + } + }, + { + "ph": "f", "id": 72, "pid": 2338711, "tid": 2379440, "ts": 6274141849194.680, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141849195.844, "dur": 2.370, + "args": { + "External id": 502967,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141849196.486, "dur": 1.250, + "args": { + "External id": 502968,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141849197.267, "dur": 0.379, + "args": { + "External id": 502969,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141849204.664, "dur": 9.129, + "args": { + "External id": 502970,"Record function id": 0, "Ev Idx": 697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141849205.731, "dur": 7.547, + "args": { + "External id": 502971,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141849207.327, "dur": 5.670, + "args": { + "External id": 502972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141849211.131, "dur": 1.739, + "args": { + "External id": 502973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141849216.961, "dur": 8.474, + "args": { + "External id": 502974,"Record function id": 0, "Sequence number": 5516161, "Fwd thread id": 1, "Ev Idx": 701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141849217.797, "dur": 5.173, + "args": { + "External id": 502975,"Sequence number": 5516161, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 702 + } + }, + { + "ph": "f", "id": 73, "pid": 2338711, "tid": 2379440, "ts": 6274141849217.797, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141849219.180, "dur": 3.641, + "args": { + "External id": 502976,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141849221.979, "dur": 0.709, + "args": { + "External id": 502977,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141849228.674, "dur": 102.743, + "args": { + "External id": 502978,"Record function id": 0, "Sequence number": 5516160, "Fwd thread id": 1, "Ev Idx": 705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141849229.378, "dur": 94.245, + "args": { + "External id": 502979,"Sequence number": 5516160, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 706 + } + }, + { + "ph": "f", "id": 74, "pid": 2338711, "tid": 2379440, "ts": 6274141849229.378, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141849232.321, "dur": 5.332, + "args": { + "External id": 502980,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141849232.989, "dur": 4.251, + "args": { + "External id": 502981,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141849236.610, "dur": 0.525, + "args": { + "External id": 502982,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141849238.574, "dur": 34.685, + "args": { + "External id": 502983,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141849274.459, "dur": 3.053, + "args": { + "External id": 502984,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141849275.117, "dur": 1.887, + "args": { + "External id": 502985,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141849276.264, "dur": 0.635, + "args": { + "External id": 502986,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141849278.756, "dur": 10.680, + "args": { + "External id": 502987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141849282.340, "dur": 6.583, + "args": { + "External id": 502988,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141849285.462, "dur": 3.395, + "args": { + "External id": 502989,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141849290.308, "dur": 32.639, + "args": { + "External id": 502990,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141849336.036, "dur": 32.222, + "args": { + "External id": 502991,"Record function id": 0, "Sequence number": 5516159, "Fwd thread id": 1, "Ev Idx": 718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141849337.115, "dur": 3.883, + "args": { + "External id": 502992,"Sequence number": 5516159, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 719 + } + }, + { + "ph": "f", "id": 75, "pid": 2338711, "tid": 2379440, "ts": 6274141849337.115, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141849338.693, "dur": 2.161, + "args": { + "External id": 502993,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141849339.602, "dur": 1.113, + "args": { + "External id": 502994,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2379440, + "ts": 6274141849344.077, "dur": 21.085, + "args": { + "External id": 502995,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141849371.761, "dur": 8.962, + "args": { + "External id": 502996,"Record function id": 0, "Sequence number": 5516158, "Fwd thread id": 1, "Ev Idx": 723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141849372.860, "dur": 6.082, + "args": { + "External id": 502997,"Sequence number": 5516158, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 724 + } + }, + { + "ph": "f", "id": 76, "pid": 2338711, "tid": 2379440, "ts": 6274141849372.860, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141849376.403, "dur": 2.317, + "args": { + "External id": 502998,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141849376.957, "dur": 1.293, + "args": { + "External id": 502999,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141849377.755, "dur": 0.382, + "args": { + "External id": 503000,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141849386.529, "dur": 4.980, + "args": { + "External id": 503001,"Record function id": 0, "Ev Idx": 728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141849387.956, "dur": 3.066, + "args": { + "External id": 503002,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141849388.787, "dur": 1.965, + "args": { + "External id": 503003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141849389.243, "dur": 1.404, + "args": { + "External id": 503004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141849398.162, "dur": 488.234, + "args": { + "External id": 503005,"Record function id": 0, "Sequence number": 5516157, "Fwd thread id": 1, "Ev Idx": 732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141849399.360, "dur": 451.833, + "args": { + "External id": 503006,"Sequence number": 5516157, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 733 + } + }, + { + "ph": "f", "id": 77, "pid": 2338711, "tid": 2379440, "ts": 6274141849399.360, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2379440, + "ts": 6274141849424.567, "dur": 33.538, + "args": { + "External id": 503007,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274141849426.366, "dur": 31.540, + "args": { + "External id": 503008,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141849429.046, "dur": 6.333, + "args": { + "External id": 503009,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141849431.616, "dur": 3.230, + "args": { + "External id": 503010,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141849436.676, "dur": 20.555, + "args": { + "External id": 503011,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141849473.916, "dur": 3.691, + "args": { + "External id": 503012,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141849476.400, "dur": 1.039, + "args": { + "External id": 503013,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141849481.698, "dur": 4.019, + "args": { + "External id": 503014,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141849482.172, "dur": 3.453, + "args": { + "External id": 503015,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141849499.703, "dur": 2.679, + "args": { + "External id": 503016,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141849513.882, "dur": 2.225, + "args": { + "External id": 503017,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141849723.171, "dur": 3.787, + "args": { + "External id": 503018,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141849731.545, "dur": 41.280, + "args": { + "External id": 503019,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141849748.638, "dur": 1.087, + "args": { + "External id": 503020,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274141849778.940, "dur": 31.391, + "args": { + "External id": 503021,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274141849781.094, "dur": 29.031, + "args": { + "External id": 503022,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141849785.684, "dur": 3.916, + "args": { + "External id": 503023,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141849793.947, "dur": 15.673, + "args": { + "External id": 503024,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274141849815.587, "dur": 4.527, + "args": { + "External id": 503025,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141849819.140, "dur": 0.835, + "args": { + "External id": 503026,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141849827.068, "dur": 1.960, + "args": { + "External id": 503027,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141849827.838, "dur": 1.059, + "args": { + "External id": 503028,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141849830.994, "dur": 7.643, + "args": { + "External id": 503029,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141849834.240, "dur": 4.309, + "args": { + "External id": 503030,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274141849867.317, "dur": 17.515, + "args": { + "External id": 503031,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141849902.267, "dur": 10.212, + "args": { + "External id": 503032,"Record function id": 0, "Ev Idx": 759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141849904.768, "dur": 6.964, + "args": { + "External id": 503033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141849907.288, "dur": 3.481, + "args": { + "External id": 503034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141849908.741, "dur": 1.872, + "args": { + "External id": 503035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141849916.502, "dur": 10.250, + "args": { + "External id": 503036,"Record function id": 0, "Sequence number": 5516156, "Fwd thread id": 1, "Ev Idx": 763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141849917.218, "dur": 6.932, + "args": { + "External id": 503037,"Sequence number": 5516156, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 764 + } + }, + { + "ph": "f", "id": 78, "pid": 2338711, "tid": 2379440, "ts": 6274141849917.218, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141849919.509, "dur": 4.432, + "args": { + "External id": 503038,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141849922.779, "dur": 1.067, + "args": { + "External id": 503039,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141849930.365, "dur": 185.729, + "args": { + "External id": 503040,"Record function id": 0, "Sequence number": 5516155, "Fwd thread id": 1, "Ev Idx": 767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141849931.189, "dur": 175.243, + "args": { + "External id": 503041,"Sequence number": 5516155, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 768 + } + }, + { + "ph": "f", "id": 79, "pid": 2338711, "tid": 2379440, "ts": 6274141849931.189, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141849935.333, "dur": 5.248, + "args": { + "External id": 503042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141849937.424, "dur": 2.543, + "args": { + "External id": 503043,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141849938.897, "dur": 0.891, + "args": { + "External id": 503044,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141849941.681, "dur": 71.787, + "args": { + "External id": 503045,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141850014.893, "dur": 8.220, + "args": { + "External id": 503046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141850018.287, "dur": 4.221, + "args": { + "External id": 503047,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141850021.414, "dur": 0.980, + "args": { + "External id": 503048,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141850024.735, "dur": 2.954, + "args": { + "External id": 503049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141850025.697, "dur": 1.519, + "args": { + "External id": 503050,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141850026.641, "dur": 0.508, + "args": { + "External id": 503051,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141850028.323, "dur": 76.495, + "args": { + "External id": 503052,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141850124.412, "dur": 15.382, + "args": { + "External id": 503053,"Record function id": 0, "Sequence number": 5516154, "Fwd thread id": 1, "Ev Idx": 780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141850125.714, "dur": 12.014, + "args": { + "External id": 503054,"Sequence number": 5516154, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 781 + } + }, + { + "ph": "f", "id": 80, "pid": 2338711, "tid": 2379440, "ts": 6274141850125.714, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141850127.359, "dur": 10.223, + "args": { + "External id": 503055,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141850133.388, "dur": 4.054, + "args": { + "External id": 503056,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141850145.515, "dur": 6.686, + "args": { + "External id": 503057,"Record function id": 0, "Sequence number": 5516153, "Fwd thread id": 1, "Ev Idx": 784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141850146.243, "dur": 4.271, + "args": { + "External id": 503058,"Sequence number": 5516153, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 785 + } + }, + { + "ph": "f", "id": 81, "pid": 2338711, "tid": 2379440, "ts": 6274141850146.243, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141850146.981, "dur": 3.311, + "args": { + "External id": 503059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141850148.114, "dur": 1.648, + "args": { + "External id": 503060,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141850149.188, "dur": 0.459, + "args": { + "External id": 503061,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141850156.100, "dur": 6.728, + "args": { + "External id": 503062,"Record function id": 0, "Ev Idx": 789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141850157.621, "dur": 4.679, + "args": { + "External id": 503063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141850159.221, "dur": 2.780, + "args": { + "External id": 503064,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141850160.375, "dur": 1.520, + "args": { + "External id": 503065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141850165.992, "dur": 8.628, + "args": { + "External id": 503066,"Record function id": 0, "Sequence number": 5516152, "Fwd thread id": 1, "Ev Idx": 793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141850169.649, "dur": 3.079, + "args": { + "External id": 503067,"Sequence number": 5516152, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 794 + } + }, + { + "ph": "f", "id": 82, "pid": 2338711, "tid": 2379440, "ts": 6274141850169.649, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141850170.808, "dur": 1.765, + "args": { + "External id": 503068,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141850171.208, "dur": 1.232, + "args": { + "External id": 503069,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141850179.077, "dur": 372.550, + "args": { + "External id": 503070,"Record function id": 0, "Sequence number": 5516151, "Fwd thread id": 1, "Ev Idx": 797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141850182.756, "dur": 351.466, + "args": { + "External id": 503071,"Sequence number": 5516151, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 798 + } + }, + { + "ph": "f", "id": 83, "pid": 2338711, "tid": 2379440, "ts": 6274141850182.756, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141850200.956, "dur": 7.565, + "args": { + "External id": 503072,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141850203.432, "dur": 4.596, + "args": { + "External id": 503073,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141850210.776, "dur": 6.636, + "args": { + "External id": 503074,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141850214.033, "dur": 3.205, + "args": { + "External id": 503075,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141850218.892, "dur": 2.916, + "args": { + "External id": 503076,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141850219.709, "dur": 1.945, + "args": { + "External id": 503077,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141850252.173, "dur": 256.370, + "args": { + "External id": 503078,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141850336.068, "dur": 3.397, + "args": { + "External id": 503079,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141850343.690, "dur": 4.731, + "args": { + "External id": 503080,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141850351.756, "dur": 2.922, + "args": { + "External id": 503081,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141850355.818, "dur": 1.277, + "args": { + "External id": 503082,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141850405.668, "dur": 2.753, + "args": { + "External id": 503083,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141850407.098, "dur": 1.182, + "args": { + "External id": 503084,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141850409.883, "dur": 33.040, + "args": { + "External id": 503085,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141850419.220, "dur": 0.884, + "args": { + "External id": 503086,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141850443.950, "dur": 1.163, + "args": { + "External id": 503087,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141850444.517, "dur": 0.515, + "args": { + "External id": 503088,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141850446.123, "dur": 16.650, + "args": { + "External id": 503089,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141850447.238, "dur": 2.047, + "args": { + "External id": 503090,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274141850522.100, "dur": 3.356, + "args": { + "External id": 503091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274141850528.219, "dur": 0.725, + "args": { + "External id": 503092,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274141850530.849, "dur": 0.553, + "args": { + "External id": 503093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141850558.974, "dur": 283.625, + "args": { + "External id": 503094,"Record function id": 0, "Sequence number": 5516150, "Fwd thread id": 1, "Ev Idx": 821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141850560.683, "dur": 273.027, + "args": { + "External id": 503095,"Sequence number": 5516150, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 822 + } + }, + { + "ph": "f", "id": 84, "pid": 2338711, "tid": 2379440, "ts": 6274141850560.683, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274141850582.505, "dur": 83.688, + "args": { + "External id": 503096,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141850611.672, "dur": 6.491, + "args": { + "External id": 503097,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141850620.597, "dur": 44.713, + "args": { + "External id": 503098,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141850680.396, "dur": 4.573, + "args": { + "External id": 503099,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141850681.963, "dur": 2.713, + "args": { + "External id": 503100,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141850853.041, "dur": 175.953, + "args": { + "External id": 503101,"Record function id": 0, "Sequence number": 5516149, "Fwd thread id": 1, "Ev Idx": 828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141850854.894, "dur": 166.193, + "args": { + "External id": 503102,"Sequence number": 5516149, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 829 + } + }, + { + "ph": "f", "id": 85, "pid": 2338711, "tid": 2379440, "ts": 6274141850854.894, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274141850869.535, "dur": 49.658, + "args": { + "External id": 503103,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141850874.493, "dur": 6.468, + "args": { + "External id": 503104,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141850882.258, "dur": 36.299, + "args": { + "External id": 503105,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141850926.835, "dur": 4.342, + "args": { + "External id": 503106,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141850928.710, "dur": 2.172, + "args": { + "External id": 503107,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851035.463, "dur": 15.899, + "args": { + "External id": 503108,"Record function id": 0, "Sequence number": 5516148, "Fwd thread id": 1, "Ev Idx": 835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851037.208, "dur": 11.605, + "args": { + "External id": 503109,"Sequence number": 5516148, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 836 + } + }, + { + "ph": "f", "id": 86, "pid": 2338711, "tid": 2379440, "ts": 6274141851037.208, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141851039.759, "dur": 8.807, + "args": { + "External id": 503110,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141851043.535, "dur": 4.820, + "args": { + "External id": 503111,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851054.989, "dur": 45.787, + "args": { + "External id": 503112,"Record function id": 0, "Sequence number": 5516147, "Fwd thread id": 1, "Ev Idx": 839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851092.875, "dur": 4.667, + "args": { + "External id": 503113,"Sequence number": 5516147, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 840 + } + }, + { + "ph": "f", "id": 87, "pid": 2338711, "tid": 2379440, "ts": 6274141851092.875, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141851094.297, "dur": 2.859, + "args": { + "External id": 503114,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141851095.202, "dur": 1.698, + "args": { + "External id": 503115,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851108.035, "dur": 9.203, + "args": { + "External id": 503116,"Record function id": 0, "Sequence number": 5516146, "Fwd thread id": 1, "Ev Idx": 843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851111.113, "dur": 4.460, + "args": { + "External id": 503117,"Sequence number": 5516146, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 844 + } + }, + { + "ph": "f", "id": 88, "pid": 2338711, "tid": 2379440, "ts": 6274141851111.113, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141851111.863, "dur": 3.579, + "args": { + "External id": 503118,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141851114.546, "dur": 0.800, + "args": { + "External id": 503119,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851120.752, "dur": 7.230, + "args": { + "External id": 503120,"Record function id": 0, "Sequence number": 5516145, "Fwd thread id": 1, "Ev Idx": 847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851121.619, "dur": 3.651, + "args": { + "External id": 503121,"Sequence number": 5516145, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 848 + } + }, + { + "ph": "f", "id": 89, "pid": 2338711, "tid": 2379440, "ts": 6274141851121.619, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141851122.986, "dur": 2.143, + "args": { + "External id": 503122,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141851123.623, "dur": 1.403, + "args": { + "External id": 503123,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851131.217, "dur": 167.754, + "args": { + "External id": 503124,"Record function id": 0, "Sequence number": 5516144, "Fwd thread id": 1, "Ev Idx": 851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851131.929, "dur": 159.064, + "args": { + "External id": 503125,"Sequence number": 5516144, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 852 + } + }, + { + "ph": "f", "id": 90, "pid": 2338711, "tid": 2379440, "ts": 6274141851131.929, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141851138.348, "dur": 6.793, + "args": { + "External id": 503126,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141851140.490, "dur": 3.991, + "args": { + "External id": 503127,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141851142.613, "dur": 1.589, + "args": { + "External id": 503128,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141851146.856, "dur": 77.751, + "args": { + "External id": 503129,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141851226.077, "dur": 12.073, + "args": { + "External id": 503130,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141851226.923, "dur": 10.367, + "args": { + "External id": 503131,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141851233.110, "dur": 4.025, + "args": { + "External id": 503132,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141851240.097, "dur": 2.886, + "args": { + "External id": 503133,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141851241.417, "dur": 1.140, + "args": { + "External id": 503134,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141851242.043, "dur": 0.440, + "args": { + "External id": 503135,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141851243.710, "dur": 46.381, + "args": { + "External id": 503136,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851304.326, "dur": 11.536, + "args": { + "External id": 503137,"Record function id": 0, "Sequence number": 5516143, "Fwd thread id": 1, "Ev Idx": 864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851305.295, "dur": 8.783, + "args": { + "External id": 503138,"Sequence number": 5516143, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 865 + } + }, + { + "ph": "f", "id": 91, "pid": 2338711, "tid": 2379440, "ts": 6274141851305.295, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141851306.901, "dur": 7.033, + "args": { + "External id": 503139,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141851312.631, "dur": 1.158, + "args": { + "External id": 503140,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851319.344, "dur": 7.400, + "args": { + "External id": 503141,"Record function id": 0, "Sequence number": 5516142, "Fwd thread id": 1, "Ev Idx": 868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851320.024, "dur": 4.140, + "args": { + "External id": 503142,"Sequence number": 5516142, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 869 + } + }, + { + "ph": "f", "id": 92, "pid": 2338711, "tid": 2379440, "ts": 6274141851320.024, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141851320.991, "dur": 2.920, + "args": { + "External id": 503143,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141851321.711, "dur": 1.753, + "args": { + "External id": 503144,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141851322.820, "dur": 0.502, + "args": { + "External id": 503145,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141851332.727, "dur": 12.604, + "args": { + "External id": 503146,"Record function id": 0, "Ev Idx": 873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141851334.230, "dur": 10.174, + "args": { + "External id": 503147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141851337.210, "dur": 6.774, + "args": { + "External id": 503148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141851341.196, "dur": 2.672, + "args": { + "External id": 503149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851348.619, "dur": 7.985, + "args": { + "External id": 503150,"Record function id": 0, "Sequence number": 5516141, "Fwd thread id": 1, "Ev Idx": 877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851349.643, "dur": 5.221, + "args": { + "External id": 503151,"Sequence number": 5516141, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 878 + } + }, + { + "ph": "f", "id": 93, "pid": 2338711, "tid": 2379440, "ts": 6274141851349.643, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141851350.839, "dur": 3.892, + "args": { + "External id": 503152,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141851353.653, "dur": 0.936, + "args": { + "External id": 503153,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851359.781, "dur": 103.931, + "args": { + "External id": 503154,"Record function id": 0, "Sequence number": 5516140, "Fwd thread id": 1, "Ev Idx": 881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851360.642, "dur": 96.517, + "args": { + "External id": 503155,"Sequence number": 5516140, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 882 + } + }, + { + "ph": "f", "id": 94, "pid": 2338711, "tid": 2379440, "ts": 6274141851360.642, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141851362.771, "dur": 5.100, + "args": { + "External id": 503156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141851363.453, "dur": 3.985, + "args": { + "External id": 503157,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141851366.729, "dur": 0.618, + "args": { + "External id": 503158,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141851368.644, "dur": 28.256, + "args": { + "External id": 503159,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141851397.871, "dur": 10.951, + "args": { + "External id": 503160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141851404.349, "dur": 3.922, + "args": { + "External id": 503161,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141851405.446, "dur": 2.679, + "args": { + "External id": 503162,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141851410.351, "dur": 7.905, + "args": { + "External id": 503163,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141851413.529, "dur": 4.346, + "args": { + "External id": 503164,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141851416.914, "dur": 0.884, + "args": { + "External id": 503165,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141851418.751, "dur": 37.615, + "args": { + "External id": 503166,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851468.404, "dur": 34.832, + "args": { + "External id": 503167,"Record function id": 0, "Sequence number": 5516139, "Fwd thread id": 1, "Ev Idx": 894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851469.302, "dur": 3.403, + "args": { + "External id": 503168,"Sequence number": 5516139, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 895 + } + }, + { + "ph": "f", "id": 95, "pid": 2338711, "tid": 2379440, "ts": 6274141851469.302, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141851470.823, "dur": 1.743, + "args": { + "External id": 503169,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141851471.405, "dur": 1.022, + "args": { + "External id": 503170,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2379440, + "ts": 6274141851479.105, "dur": 21.474, + "args": { + "External id": 503171,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851509.481, "dur": 15.771, + "args": { + "External id": 503172,"Record function id": 0, "Sequence number": 5516138, "Fwd thread id": 1, "Ev Idx": 899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851510.308, "dur": 12.925, + "args": { + "External id": 503173,"Sequence number": 5516138, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 900 + } + }, + { + "ph": "f", "id": 96, "pid": 2338711, "tid": 2379440, "ts": 6274141851510.308, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141851513.675, "dur": 9.354, + "args": { + "External id": 503174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141851514.532, "dur": 7.912, + "args": { + "External id": 503175,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141851521.718, "dur": 0.601, + "args": { + "External id": 503176,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141851529.233, "dur": 8.111, + "args": { + "External id": 503177,"Record function id": 0, "Ev Idx": 904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141851530.412, "dur": 6.354, + "args": { + "External id": 503178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141851531.744, "dur": 4.662, + "args": { + "External id": 503179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141851532.789, "dur": 3.504, + "args": { + "External id": 503180,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851542.828, "dur": 8.646, + "args": { + "External id": 503181,"Record function id": 0, "Sequence number": 5516137, "Fwd thread id": 1, "Ev Idx": 908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851544.046, "dur": 5.485, + "args": { + "External id": 503182,"Sequence number": 5516137, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 909 + } + }, + { + "ph": "f", "id": 97, "pid": 2338711, "tid": 2379440, "ts": 6274141851544.046, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141851545.190, "dur": 4.176, + "args": { + "External id": 503183,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141851548.480, "dur": 0.773, + "args": { + "External id": 503184,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851554.886, "dur": 139.598, + "args": { + "External id": 503185,"Record function id": 0, "Sequence number": 5516136, "Fwd thread id": 1, "Ev Idx": 912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851555.846, "dur": 129.232, + "args": { + "External id": 503186,"Sequence number": 5516136, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 913 + } + }, + { + "ph": "f", "id": 98, "pid": 2338711, "tid": 2379440, "ts": 6274141851555.846, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141851558.116, "dur": 2.530, + "args": { + "External id": 503187,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141851558.759, "dur": 1.477, + "args": { + "External id": 503188,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141851559.769, "dur": 0.359, + "args": { + "External id": 503189,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141851561.534, "dur": 59.870, + "args": { + "External id": 503190,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141851627.007, "dur": 8.889, + "args": { + "External id": 503191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141851628.718, "dur": 6.157, + "args": { + "External id": 503192,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141851633.715, "dur": 0.969, + "args": { + "External id": 503193,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141851637.598, "dur": 3.764, + "args": { + "External id": 503194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141851638.917, "dur": 1.823, + "args": { + "External id": 503195,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141851640.091, "dur": 0.456, + "args": { + "External id": 503196,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141851644.560, "dur": 39.594, + "args": { + "External id": 503197,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851701.698, "dur": 29.139, + "args": { + "External id": 503198,"Record function id": 0, "Sequence number": 5516135, "Fwd thread id": 1, "Ev Idx": 925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851702.660, "dur": 5.828, + "args": { + "External id": 503199,"Sequence number": 5516135, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 926 + } + }, + { + "ph": "f", "id": 99, "pid": 2338711, "tid": 2379440, "ts": 6274141851702.660, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141851704.069, "dur": 4.271, + "args": { + "External id": 503200,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141851706.891, "dur": 1.262, + "args": { + "External id": 503201,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274141851711.440, "dur": 17.322, + "args": { + "External id": 503202,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851734.490, "dur": 12.769, + "args": { + "External id": 503203,"Record function id": 0, "Sequence number": 5516134, "Fwd thread id": 1, "Ev Idx": 930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141851735.621, "dur": 9.609, + "args": { + "External id": 503204,"Sequence number": 5516134, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 931 + } + }, + { + "ph": "f", "id": 100, "pid": 2338711, "tid": 2379440, "ts": 6274141851735.621, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141851736.835, "dur": 8.157, + "args": { + "External id": 503205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141851737.688, "dur": 6.771, + "args": { + "External id": 503206,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141851740.757, "dur": 3.570, + "args": { + "External id": 503207,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141851751.365, "dur": 6.139, + "args": { + "External id": 503208,"Record function id": 0, "Ev Idx": 935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141851752.782, "dur": 4.228, + "args": { + "External id": 503209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141851754.110, "dur": 2.548, + "args": { + "External id": 503210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141851755.008, "dur": 1.537, + "args": { + "External id": 503211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141851761.860, "dur": 428.698, + "args": { + "External id": 503212,"Record function id": 0, "Sequence number": 5516133, "Fwd thread id": 1, "Ev Idx": 939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141851762.851, "dur": 397.284, + "args": { + "External id": 503213,"Sequence number": 5516133, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 940 + } + }, + { + "ph": "f", "id": 101, "pid": 2338711, "tid": 2379440, "ts": 6274141851762.851, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141851804.535, "dur": 1.809, + "args": { + "External id": 503214,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141851805.225, "dur": 0.976, + "args": { + "External id": 503215,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141851821.147, "dur": 4.711, + "args": { + "External id": 503216,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141851838.685, "dur": 2.638, + "args": { + "External id": 503217,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141852004.339, "dur": 1.386, + "args": { + "External id": 503218,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141852009.631, "dur": 37.319, + "args": { + "External id": 503219,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141852023.050, "dur": 1.048, + "args": { + "External id": 503220,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274141852052.617, "dur": 74.714, + "args": { + "External id": 503221,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274141852054.574, "dur": 72.546, + "args": { + "External id": 503222,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141852097.330, "dur": 7.182, + "args": { + "External id": 503223,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141852106.838, "dur": 19.776, + "args": { + "External id": 503224,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274141852135.659, "dur": 5.484, + "args": { + "External id": 503225,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141852136.857, "dur": 4.134, + "args": { + "External id": 503226,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141852150.483, "dur": 1.606, + "args": { + "External id": 503227,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141852151.174, "dur": 0.816, + "args": { + "External id": 503228,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274141852171.383, "dur": 15.204, + "args": { + "External id": 503229,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141852203.166, "dur": 12.402, + "args": { + "External id": 503230,"Record function id": 0, "Ev Idx": 957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141852205.085, "dur": 9.752, + "args": { + "External id": 503231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141852207.840, "dur": 5.916, + "args": { + "External id": 503232,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141852211.948, "dur": 1.688, + "args": { + "External id": 503233,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141852219.091, "dur": 5.804, + "args": { + "External id": 503234,"Record function id": 0, "Sequence number": 5516132, "Fwd thread id": 1, "Ev Idx": 961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141852220.593, "dur": 1.336, + "args": { + "External id": 503235,"Sequence number": 5516132, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 962 + } + }, + { + "ph": "f", "id": 102, "pid": 2338711, "tid": 2379440, "ts": 6274141852220.593, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141852228.787, "dur": 459.608, + "args": { + "External id": 503236,"Record function id": 0, "Sequence number": 5516131, "Fwd thread id": 1, "Ev Idx": 963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141852230.091, "dur": 445.499, + "args": { + "External id": 503237,"Sequence number": 5516131, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 964 + } + }, + { + "ph": "f", "id": 103, "pid": 2338711, "tid": 2379440, "ts": 6274141852230.091, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141852262.515, "dur": 11.268, + "args": { + "External id": 503238,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338711, "tid": 2379440, + "ts": 6274141852270.134, "dur": 3.418, + "args": { + "External id": 503239,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141852276.845, "dur": 6.759, + "args": { + "External id": 503240,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141852280.545, "dur": 2.431, + "args": { + "External id": 503241,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141852282.015, "dur": 0.806, + "args": { + "External id": 503242,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2379440, + "ts": 6274141852287.372, "dur": 90.573, + "args": { + "External id": 503243,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141852288.649, "dur": 2.336, + "args": { + "External id": 503244,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141852289.114, "dur": 1.402, + "args": { + "External id": 503245,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141852289.822, "dur": 0.582, + "args": { + "External id": 503246,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2379440, + "ts": 6274141852294.417, "dur": 83.133, + "args": { + "External id": 503247,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141852295.996, "dur": 80.727, + "args": { + "External id": 503248,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274141852381.834, "dur": 4.581, + "args": { + "External id": 503249,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141852385.168, "dur": 1.112, + "args": { + "External id": 503250,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141852420.527, "dur": 5.609, + "args": { + "External id": 503251,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141852426.977, "dur": 4.113, + "args": { + "External id": 503252,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141852431.900, "dur": 1.579, + "args": { + "External id": 503253,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141852470.142, "dur": 2.172, + "args": { + "External id": 503254,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141852471.040, "dur": 1.035, + "args": { + "External id": 503255,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338711, "tid": 2379440, + "ts": 6274141852493.776, "dur": 158.756, + "args": { + "External id": 503256,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2379440, + "ts": 6274141852498.941, "dur": 4.838, + "args": { + "External id": 503257,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141852502.183, "dur": 0.784, + "args": { + "External id": 503258,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274141852507.374, "dur": 5.704, + "args": { + "External id": 503259,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141852511.712, "dur": 0.504, + "args": { + "External id": 503260,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2379440, + "ts": 6274141852514.599, "dur": 3.911, + "args": { + "External id": 503261,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141852517.814, "dur": 0.400, + "args": { + "External id": 503262,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274141852519.005, "dur": 4.078, + "args": { + "External id": 503263,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141852521.959, "dur": 0.585, + "args": { + "External id": 503264,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274141852528.803, "dur": 4.290, + "args": { + "External id": 503265,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141852532.404, "dur": 0.383, + "args": { + "External id": 503266,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141852534.027, "dur": 8.734, + "args": { + "External id": 503267,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338711, "tid": 2379440, + "ts": 6274141852538.757, "dur": 3.826, + "args": { + "External id": 503268,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274141852543.540, "dur": 1.570, + "args": { + "External id": 503269,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141852544.472, "dur": 0.380, + "args": { + "External id": 503270,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141852547.801, "dur": 3.185, + "args": { + "External id": 503271,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141852548.568, "dur": 2.333, + "args": { + "External id": 503272,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274141852552.337, "dur": 78.701, + "args": { + "External id": 503273,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 1000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141852637.101, "dur": 2.512, + "args": { + "External id": 503274,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 1001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274141852640.848, "dur": 6.012, + "args": { + "External id": 503275,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 1002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141852644.985, "dur": 1.201, + "args": { + "External id": 503276,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 1003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141852649.799, "dur": 1.277, + "args": { + "External id": 503277,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 1004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141852700.872, "dur": 9.223, + "args": { + "External id": 503278,"Record function id": 0, "Ev Idx": 1005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141852702.833, "dur": 6.447, + "args": { + "External id": 503279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141852705.125, "dur": 3.135, + "args": { + "External id": 503280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141852706.162, "dur": 1.963, + "args": { + "External id": 503281,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141852713.772, "dur": 11.630, + "args": { + "External id": 503282,"Record function id": 0, "Sequence number": 5516130, "Fwd thread id": 1, "Ev Idx": 1009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141852714.929, "dur": 8.362, + "args": { + "External id": 503283,"Sequence number": 5516130, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 1010 + } + }, + { + "ph": "f", "id": 104, "pid": 2338711, "tid": 2379440, "ts": 6274141852714.929, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141852717.005, "dur": 6.098, + "args": { + "External id": 503284,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141852722.040, "dur": 0.939, + "args": { + "External id": 503285,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141852728.697, "dur": 147.506, + "args": { + "External id": 503286,"Record function id": 0, "Sequence number": 5516129, "Fwd thread id": 1, "Ev Idx": 1013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141852729.515, "dur": 140.265, + "args": { + "External id": 503287,"Sequence number": 5516129, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1014 + } + }, + { + "ph": "f", "id": 105, "pid": 2338711, "tid": 2379440, "ts": 6274141852729.515, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141852733.139, "dur": 6.509, + "args": { + "External id": 503288,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141852734.665, "dur": 4.413, + "args": { + "External id": 503289,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 1016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141852735.777, "dur": 3.075, + "args": { + "External id": 503290,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141852743.554, "dur": 50.100, + "args": { + "External id": 503291,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 1018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141852794.795, "dur": 3.612, + "args": { + "External id": 503292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141852795.556, "dur": 2.215, + "args": { + "External id": 503293,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141852796.554, "dur": 1.066, + "args": { + "External id": 503294,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141852799.843, "dur": 32.244, + "args": { + "External id": 503295,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141852801.090, "dur": 30.386, + "args": { + "External id": 503296,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141852831.083, "dur": 0.307, + "args": { + "External id": 503297,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141852832.682, "dur": 36.165, + "args": { + "External id": 503298,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 1025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141852883.427, "dur": 5.836, + "args": { + "External id": 503299,"Record function id": 0, "Sequence number": 5516128, "Fwd thread id": 1, "Ev Idx": 1026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141852884.373, "dur": 3.387, + "args": { + "External id": 503300,"Sequence number": 5516128, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1027 + } + }, + { + "ph": "f", "id": 106, "pid": 2338711, "tid": 2379440, "ts": 6274141852884.373, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141852885.877, "dur": 1.751, + "args": { + "External id": 503301,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141852886.503, "dur": 1.030, + "args": { + "External id": 503302,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141852892.588, "dur": 8.343, + "args": { + "External id": 503303,"Record function id": 0, "Sequence number": 5516127, "Fwd thread id": 1, "Ev Idx": 1030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141852893.311, "dur": 5.777, + "args": { + "External id": 503304,"Sequence number": 5516127, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1031 + } + }, + { + "ph": "f", "id": 107, "pid": 2338711, "tid": 2379440, "ts": 6274141852893.311, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141852894.090, "dur": 4.758, + "args": { + "External id": 503305,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141852894.808, "dur": 3.611, + "args": { + "External id": 503306,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141852897.887, "dur": 0.430, + "args": { + "External id": 503307,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141852906.960, "dur": 6.624, + "args": { + "External id": 503308,"Record function id": 0, "Ev Idx": 1035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141852907.997, "dur": 4.982, + "args": { + "External id": 503309,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141852908.912, "dur": 3.809, + "args": { + "External id": 503310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141852909.337, "dur": 3.260, + "args": { + "External id": 503311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141852916.530, "dur": 5.296, + "args": { + "External id": 503312,"Record function id": 0, "Sequence number": 5516126, "Fwd thread id": 1, "Ev Idx": 1039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141852917.257, "dur": 2.146, + "args": { + "External id": 503313,"Sequence number": 5516126, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 1040 + } + }, + { + "ph": "f", "id": 108, "pid": 2338711, "tid": 2379440, "ts": 6274141852917.257, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141852917.975, "dur": 1.275, + "args": { + "External id": 503314,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141852918.410, "dur": 0.700, + "args": { + "External id": 503315,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141852924.924, "dur": 94.121, + "args": { + "External id": 503316,"Record function id": 0, "Sequence number": 5516125, "Fwd thread id": 1, "Ev Idx": 1043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141852925.650, "dur": 86.336, + "args": { + "External id": 503317,"Sequence number": 5516125, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1044 + } + }, + { + "ph": "f", "id": 109, "pid": 2338711, "tid": 2379440, "ts": 6274141852925.650, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141852930.137, "dur": 2.053, + "args": { + "External id": 503318,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141852930.621, "dur": 1.157, + "args": { + "External id": 503319,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 1046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141852931.182, "dur": 0.495, + "args": { + "External id": 503320,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141852932.808, "dur": 36.743, + "args": { + "External id": 503321,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 1048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141852970.786, "dur": 5.771, + "args": { + "External id": 503322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141852971.308, "dur": 4.753, + "args": { + "External id": 503323,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141852975.185, "dur": 0.770, + "args": { + "External id": 503324,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141852977.315, "dur": 2.150, + "args": { + "External id": 503325,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141852978.143, "dur": 0.958, + "args": { + "External id": 503326,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141852978.729, "dur": 0.302, + "args": { + "External id": 503327,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141852979.897, "dur": 31.329, + "args": { + "External id": 503328,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 1055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141853023.182, "dur": 75.647, + "args": { + "External id": 503329,"Record function id": 0, "Sequence number": 5516124, "Fwd thread id": 1, "Ev Idx": 1056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141853024.245, "dur": 7.302, + "args": { + "External id": 503330,"Sequence number": 5516124, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1057 + } + }, + { + "ph": "f", "id": 110, "pid": 2338711, "tid": 2379440, "ts": 6274141853024.245, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141853025.539, "dur": 5.872, + "args": { + "External id": 503331,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141853030.257, "dur": 1.046, + "args": { + "External id": 503332,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2379440, + "ts": 6274141853034.815, "dur": 59.884, + "args": { + "External id": 503333,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141853105.278, "dur": 8.733, + "args": { + "External id": 503334,"Record function id": 0, "Sequence number": 5516123, "Fwd thread id": 1, "Ev Idx": 1061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141853106.597, "dur": 5.279, + "args": { + "External id": 503335,"Sequence number": 5516123, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1062 + } + }, + { + "ph": "f", "id": 111, "pid": 2338711, "tid": 2379440, "ts": 6274141853106.597, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141853107.845, "dur": 3.799, + "args": { + "External id": 503336,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141853108.952, "dur": 2.064, + "args": { + "External id": 503337,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141853110.292, "dur": 0.585, + "args": { + "External id": 503338,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141853117.956, "dur": 8.341, + "args": { + "External id": 503339,"Record function id": 0, "Ev Idx": 1066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141853119.081, "dur": 6.657, + "args": { + "External id": 503340,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141853120.380, "dur": 5.005, + "args": { + "External id": 503341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141853123.685, "dur": 1.583, + "args": { + "External id": 503342,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141853130.761, "dur": 434.540, + "args": { + "External id": 503343,"Record function id": 0, "Sequence number": 5516122, "Fwd thread id": 1, "Ev Idx": 1070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141853132.072, "dur": 398.993, + "args": { + "External id": 503344,"Sequence number": 5516122, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 1071 + } + }, + { + "ph": "f", "id": 112, "pid": 2338711, "tid": 2379440, "ts": 6274141853132.072, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2379440, + "ts": 6274141853159.693, "dur": 38.506, + "args": { + "External id": 503345,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274141853161.372, "dur": 36.623, + "args": { + "External id": 503346,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141853166.101, "dur": 6.809, + "args": { + "External id": 503347,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 1074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141853168.725, "dur": 3.655, + "args": { + "External id": 503348,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141853174.279, "dur": 23.227, + "args": { + "External id": 503349,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141853210.906, "dur": 2.353, + "args": { + "External id": 503350,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141853211.802, "dur": 1.321, + "args": { + "External id": 503351,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141853217.255, "dur": 3.488, + "args": { + "External id": 503352,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141853220.025, "dur": 0.619, + "args": { + "External id": 503353,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141853232.892, "dur": 2.763, + "args": { + "External id": 503354,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141853248.866, "dur": 2.135, + "args": { + "External id": 503355,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141853417.005, "dur": 3.584, + "args": { + "External id": 503356,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 1083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141853424.953, "dur": 37.128, + "args": { + "External id": 503357,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 1084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141853438.095, "dur": 0.567, + "args": { + "External id": 503358,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 1085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274141853468.229, "dur": 31.626, + "args": { + "External id": 503359,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 1086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274141853472.406, "dur": 27.239, + "args": { + "External id": 503360,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 1087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141853476.359, "dur": 5.585, + "args": { + "External id": 503361,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141853483.461, "dur": 15.481, + "args": { + "External id": 503362,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 1089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274141853504.660, "dur": 2.288, + "args": { + "External id": 503363,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 1090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141853505.717, "dur": 1.086, + "args": { + "External id": 503364,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 1091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141853513.348, "dur": 5.167, + "args": { + "External id": 503365,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141853517.581, "dur": 0.849, + "args": { + "External id": 503366,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141853520.407, "dur": 1.451, + "args": { + "External id": 503367,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141853520.815, "dur": 0.936, + "args": { + "External id": 503368,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274141853545.491, "dur": 18.195, + "args": { + "External id": 503369,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141853575.130, "dur": 7.631, + "args": { + "External id": 503370,"Record function id": 0, "Ev Idx": 1097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141853576.930, "dur": 5.213, + "args": { + "External id": 503371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141853578.842, "dur": 2.369, + "args": { + "External id": 503372,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141853579.882, "dur": 1.250, + "args": { + "External id": 503373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141853609.861, "dur": 18.492, + "args": { + "External id": 503374,"Record function id": 0, "Sequence number": 5516121, "Fwd thread id": 1, "Ev Idx": 1101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141853612.072, "dur": 11.864, + "args": { + "External id": 503375,"Sequence number": 5516121, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1102 + } + }, + { + "ph": "f", "id": 113, "pid": 2338711, "tid": 2379440, "ts": 6274141853612.072, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141853614.402, "dur": 9.231, + "args": { + "External id": 503376,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141853620.667, "dur": 2.627, + "args": { + "External id": 503377,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141853633.564, "dur": 170.694, + "args": { + "External id": 503378,"Record function id": 0, "Sequence number": 5516120, "Fwd thread id": 1, "Ev Idx": 1105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141853635.045, "dur": 161.879, + "args": { + "External id": 503379,"Sequence number": 5516120, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1106 + } + }, + { + "ph": "f", "id": 114, "pid": 2338711, "tid": 2379440, "ts": 6274141853635.045, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141853639.894, "dur": 6.496, + "args": { + "External id": 503380,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141853641.612, "dur": 3.990, + "args": { + "External id": 503381,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 1108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141853644.123, "dur": 1.179, + "args": { + "External id": 503382,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141853650.540, "dur": 78.482, + "args": { + "External id": 503383,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 1110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141853730.670, "dur": 3.978, + "args": { + "External id": 503384,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141853731.610, "dur": 2.373, + "args": { + "External id": 503385,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141853732.862, "dur": 0.975, + "args": { + "External id": 503386,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141853736.122, "dur": 20.376, + "args": { + "External id": 503387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141853747.231, "dur": 8.726, + "args": { + "External id": 503388,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141853752.584, "dur": 3.302, + "args": { + "External id": 503389,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141853757.183, "dur": 39.027, + "args": { + "External id": 503390,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 1117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141853811.969, "dur": 6.577, + "args": { + "External id": 503391,"Record function id": 0, "Sequence number": 5516119, "Fwd thread id": 1, "Ev Idx": 1118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141853812.850, "dur": 3.562, + "args": { + "External id": 503392,"Sequence number": 5516119, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1119 + } + }, + { + "ph": "f", "id": 115, "pid": 2338711, "tid": 2379440, "ts": 6274141853812.850, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141853814.391, "dur": 1.889, + "args": { + "External id": 503393,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141853814.909, "dur": 1.240, + "args": { + "External id": 503394,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141853822.033, "dur": 10.418, + "args": { + "External id": 503395,"Record function id": 0, "Sequence number": 5516118, "Fwd thread id": 1, "Ev Idx": 1122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141853822.811, "dur": 7.227, + "args": { + "External id": 503396,"Sequence number": 5516118, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1123 + } + }, + { + "ph": "f", "id": 116, "pid": 2338711, "tid": 2379440, "ts": 6274141853822.811, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141853823.396, "dur": 6.410, + "args": { + "External id": 503397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141853824.131, "dur": 5.168, + "args": { + "External id": 503398,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141853828.824, "dur": 0.390, + "args": { + "External id": 503399,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141853836.608, "dur": 5.959, + "args": { + "External id": 503400,"Record function id": 0, "Ev Idx": 1127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141853837.726, "dur": 4.260, + "args": { + "External id": 503401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141853839.211, "dur": 2.423, + "args": { + "External id": 503402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141853840.174, "dur": 1.364, + "args": { + "External id": 503403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141853845.471, "dur": 6.106, + "args": { + "External id": 503404,"Record function id": 0, "Sequence number": 5516117, "Fwd thread id": 1, "Ev Idx": 1131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141853846.202, "dur": 3.316, + "args": { + "External id": 503405,"Sequence number": 5516117, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1132 + } + }, + { + "ph": "f", "id": 117, "pid": 2338711, "tid": 2379440, "ts": 6274141853846.202, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141853847.375, "dur": 1.983, + "args": { + "External id": 503406,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141853848.038, "dur": 1.197, + "args": { + "External id": 503407,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141853855.592, "dur": 437.973, + "args": { + "External id": 503408,"Record function id": 0, "Sequence number": 5516116, "Fwd thread id": 1, "Ev Idx": 1135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141853856.718, "dur": 417.404, + "args": { + "External id": 503409,"Sequence number": 5516116, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1136 + } + }, + { + "ph": "f", "id": 118, "pid": 2338711, "tid": 2379440, "ts": 6274141853856.718, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141853874.185, "dur": 11.117, + "args": { + "External id": 503410,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 1137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141853880.542, "dur": 4.283, + "args": { + "External id": 503411,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141853887.211, "dur": 2.966, + "args": { + "External id": 503412,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141853888.067, "dur": 1.887, + "args": { + "External id": 503413,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141853894.656, "dur": 7.762, + "args": { + "External id": 503414,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141853897.421, "dur": 4.800, + "args": { + "External id": 503415,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141853932.238, "dur": 312.631, + "args": { + "External id": 503416,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141854013.577, "dur": 4.916, + "args": { + "External id": 503417,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141854020.254, "dur": 2.185, + "args": { + "External id": 503418,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141854026.152, "dur": 1.975, + "args": { + "External id": 503419,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141854029.168, "dur": 2.610, + "args": { + "External id": 503420,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141854132.990, "dur": 3.116, + "args": { + "External id": 503421,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141854134.033, "dur": 1.792, + "args": { + "External id": 503422,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141854139.771, "dur": 31.574, + "args": { + "External id": 503423,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141854145.159, "dur": 1.217, + "args": { + "External id": 503424,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141854175.403, "dur": 1.364, + "args": { + "External id": 503425,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141854176.231, "dur": 0.454, + "args": { + "External id": 503426,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141854179.630, "dur": 16.070, + "args": { + "External id": 503427,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141854182.594, "dur": 0.501, + "args": { + "External id": 503428,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274141854260.088, "dur": 4.544, + "args": { + "External id": 503429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274141854267.876, "dur": 0.660, + "args": { + "External id": 503430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274141854270.588, "dur": 0.603, + "args": { + "External id": 503431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141854306.567, "dur": 249.116, + "args": { + "External id": 503432,"Record function id": 0, "Sequence number": 5516115, "Fwd thread id": 1, "Ev Idx": 1159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141854308.301, "dur": 240.542, + "args": { + "External id": 503433,"Sequence number": 5516115, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1160 + } + }, + { + "ph": "f", "id": 119, "pid": 2338711, "tid": 2379440, "ts": 6274141854308.301, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274141854330.941, "dur": 52.066, + "args": { + "External id": 503434,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141854334.045, "dur": 7.695, + "args": { + "External id": 503435,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141854343.547, "dur": 38.795, + "args": { + "External id": 503436,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 1163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141854394.197, "dur": 6.637, + "args": { + "External id": 503437,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141854398.034, "dur": 2.373, + "args": { + "External id": 503438,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141854563.990, "dur": 228.320, + "args": { + "External id": 503439,"Record function id": 0, "Sequence number": 5516114, "Fwd thread id": 1, "Ev Idx": 1166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141854565.635, "dur": 218.675, + "args": { + "External id": 503440,"Sequence number": 5516114, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1167 + } + }, + { + "ph": "f", "id": 120, "pid": 2338711, "tid": 2379440, "ts": 6274141854565.635, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274141854576.482, "dur": 77.827, + "args": { + "External id": 503441,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141854579.227, "dur": 2.990, + "args": { + "External id": 503442,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141854583.484, "dur": 69.588, + "args": { + "External id": 503443,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 1170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141854667.563, "dur": 12.993, + "args": { + "External id": 503444,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 1171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141854675.497, "dur": 4.720, + "args": { + "External id": 503445,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141854802.217, "dur": 15.768, + "args": { + "External id": 503446,"Record function id": 0, "Sequence number": 5516113, "Fwd thread id": 1, "Ev Idx": 1173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141854804.310, "dur": 10.548, + "args": { + "External id": 503447,"Sequence number": 5516113, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1174 + } + }, + { + "ph": "f", "id": 121, "pid": 2338711, "tid": 2379440, "ts": 6274141854804.310, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141854807.139, "dur": 7.309, + "args": { + "External id": 503448,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141854808.464, "dur": 5.800, + "args": { + "External id": 503449,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141854821.977, "dur": 11.300, + "args": { + "External id": 503450,"Record function id": 0, "Sequence number": 5516112, "Fwd thread id": 1, "Ev Idx": 1177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141854825.688, "dur": 5.728, + "args": { + "External id": 503451,"Sequence number": 5516112, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1178 + } + }, + { + "ph": "f", "id": 122, "pid": 2338711, "tid": 2379440, "ts": 6274141854825.688, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141854826.660, "dur": 4.619, + "args": { + "External id": 503452,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141854829.986, "dur": 1.153, + "args": { + "External id": 503453,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141854836.577, "dur": 5.696, + "args": { + "External id": 503454,"Record function id": 0, "Sequence number": 5516111, "Fwd thread id": 1, "Ev Idx": 1181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141854837.690, "dur": 2.829, + "args": { + "External id": 503455,"Sequence number": 5516111, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1182 + } + }, + { + "ph": "f", "id": 123, "pid": 2338711, "tid": 2379440, "ts": 6274141854837.690, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141854838.548, "dur": 1.831, + "args": { + "External id": 503456,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141854839.377, "dur": 0.915, + "args": { + "External id": 503457,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141854845.825, "dur": 9.447, + "args": { + "External id": 503458,"Record function id": 0, "Sequence number": 5516110, "Fwd thread id": 1, "Ev Idx": 1185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141854846.560, "dur": 6.786, + "args": { + "External id": 503459,"Sequence number": 5516110, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 1186 + } + }, + { + "ph": "f", "id": 124, "pid": 2338711, "tid": 2379440, "ts": 6274141854846.560, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141854849.123, "dur": 4.078, + "args": { + "External id": 503460,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141854851.839, "dur": 1.240, + "args": { + "External id": 503461,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141854860.763, "dur": 175.526, + "args": { + "External id": 503462,"Record function id": 0, "Sequence number": 5516109, "Fwd thread id": 1, "Ev Idx": 1189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141854861.779, "dur": 166.683, + "args": { + "External id": 503463,"Sequence number": 5516109, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1190 + } + }, + { + "ph": "f", "id": 125, "pid": 2338711, "tid": 2379440, "ts": 6274141854861.779, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141854865.971, "dur": 8.979, + "args": { + "External id": 503464,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141854868.247, "dur": 5.981, + "args": { + "External id": 503465,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 1192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141854870.377, "dur": 3.497, + "args": { + "External id": 503466,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 1193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141854876.801, "dur": 78.659, + "args": { + "External id": 503467,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 1194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141854956.572, "dur": 13.346, + "args": { + "External id": 503468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141854964.899, "dur": 4.208, + "args": { + "External id": 503469,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 1196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141854966.477, "dur": 2.469, + "args": { + "External id": 503470,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 1197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141854972.010, "dur": 5.230, + "args": { + "External id": 503471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141854973.377, "dur": 3.431, + "args": { + "External id": 503472,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141854976.282, "dur": 0.425, + "args": { + "External id": 503473,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141854977.877, "dur": 49.650, + "args": { + "External id": 503474,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 1201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855041.487, "dur": 8.794, + "args": { + "External id": 503475,"Record function id": 0, "Sequence number": 5516108, "Fwd thread id": 1, "Ev Idx": 1202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855042.543, "dur": 6.240, + "args": { + "External id": 503476,"Sequence number": 5516108, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1203 + } + }, + { + "ph": "f", "id": 126, "pid": 2338711, "tid": 2379440, "ts": 6274141855042.543, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141855044.199, "dur": 4.458, + "args": { + "External id": 503477,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141855047.281, "dur": 1.269, + "args": { + "External id": 503478,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855053.555, "dur": 50.655, + "args": { + "External id": 503479,"Record function id": 0, "Sequence number": 5516107, "Fwd thread id": 1, "Ev Idx": 1206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855054.567, "dur": 46.532, + "args": { + "External id": 503480,"Sequence number": 5516107, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1207 + } + }, + { + "ph": "f", "id": 127, "pid": 2338711, "tid": 2379440, "ts": 6274141855054.567, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141855055.384, "dur": 45.452, + "args": { + "External id": 503481,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141855095.321, "dur": 4.799, + "args": { + "External id": 503482,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141855099.030, "dur": 0.804, + "args": { + "External id": 503483,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141855111.897, "dur": 15.858, + "args": { + "External id": 503484,"Record function id": 0, "Ev Idx": 1211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141855113.754, "dur": 13.083, + "args": { + "External id": 503485,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141855116.974, "dur": 9.364, + "args": { + "External id": 503486,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141855121.093, "dur": 5.068, + "args": { + "External id": 503487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855130.997, "dur": 5.869, + "args": { + "External id": 503488,"Record function id": 0, "Sequence number": 5516106, "Fwd thread id": 1, "Ev Idx": 1215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855132.001, "dur": 3.230, + "args": { + "External id": 503489,"Sequence number": 5516106, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 1216 + } + }, + { + "ph": "f", "id": 128, "pid": 2338711, "tid": 2379440, "ts": 6274141855132.001, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141855133.215, "dur": 1.879, + "args": { + "External id": 503490,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141855133.859, "dur": 1.142, + "args": { + "External id": 503491,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855140.031, "dur": 126.286, + "args": { + "External id": 503492,"Record function id": 0, "Sequence number": 5516105, "Fwd thread id": 1, "Ev Idx": 1219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855140.855, "dur": 117.994, + "args": { + "External id": 503493,"Sequence number": 5516105, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1220 + } + }, + { + "ph": "f", "id": 129, "pid": 2338711, "tid": 2379440, "ts": 6274141855140.855, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141855143.710, "dur": 5.297, + "args": { + "External id": 503494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141855144.486, "dur": 4.064, + "args": { + "External id": 503495,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 1222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141855147.929, "dur": 0.510, + "args": { + "External id": 503496,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 1223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141855149.801, "dur": 36.765, + "args": { + "External id": 503497,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 1224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141855187.629, "dur": 5.841, + "args": { + "External id": 503498,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141855188.335, "dur": 4.587, + "args": { + "External id": 503499,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 1226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141855191.743, "dur": 1.066, + "args": { + "External id": 503500,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 1227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141855194.593, "dur": 5.753, + "args": { + "External id": 503501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141855198.546, "dur": 1.409, + "args": { + "External id": 503502,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141855199.483, "dur": 0.405, + "args": { + "External id": 503503,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141855200.999, "dur": 57.049, + "args": { + "External id": 503504,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 1231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855270.798, "dur": 38.145, + "args": { + "External id": 503505,"Record function id": 0, "Sequence number": 5516104, "Fwd thread id": 1, "Ev Idx": 1232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855271.829, "dur": 5.901, + "args": { + "External id": 503506,"Sequence number": 5516104, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1233 + } + }, + { + "ph": "f", "id": 130, "pid": 2338711, "tid": 2379440, "ts": 6274141855271.829, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141855273.329, "dur": 4.255, + "args": { + "External id": 503507,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141855276.147, "dur": 1.342, + "args": { + "External id": 503508,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2379440, + "ts": 6274141855281.080, "dur": 25.664, + "args": { + "External id": 503509,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855312.929, "dur": 8.746, + "args": { + "External id": 503510,"Record function id": 0, "Sequence number": 5516103, "Fwd thread id": 1, "Ev Idx": 1237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855315.796, "dur": 3.954, + "args": { + "External id": 503511,"Sequence number": 5516103, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1238 + } + }, + { + "ph": "f", "id": 131, "pid": 2338711, "tid": 2379440, "ts": 6274141855315.796, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141855316.714, "dur": 2.822, + "args": { + "External id": 503512,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141855317.629, "dur": 1.401, + "args": { + "External id": 503513,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141855318.513, "dur": 0.350, + "args": { + "External id": 503514,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141855325.637, "dur": 4.640, + "args": { + "External id": 503515,"Record function id": 0, "Ev Idx": 1242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141855326.863, "dur": 2.933, + "args": { + "External id": 503516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141855327.947, "dur": 1.583, + "args": { + "External id": 503517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141855328.462, "dur": 0.984, + "args": { + "External id": 503518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855333.457, "dur": 8.946, + "args": { + "External id": 503519,"Record function id": 0, "Sequence number": 5516102, "Fwd thread id": 1, "Ev Idx": 1246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855334.271, "dur": 5.715, + "args": { + "External id": 503520,"Sequence number": 5516102, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1247 + } + }, + { + "ph": "f", "id": 132, "pid": 2338711, "tid": 2379440, "ts": 6274141855334.271, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141855336.215, "dur": 3.618, + "args": { + "External id": 503521,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141855338.896, "dur": 0.834, + "args": { + "External id": 503522,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855345.376, "dur": 138.128, + "args": { + "External id": 503523,"Record function id": 0, "Sequence number": 5516101, "Fwd thread id": 1, "Ev Idx": 1250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855346.153, "dur": 129.971, + "args": { + "External id": 503524,"Sequence number": 5516101, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1251 + } + }, + { + "ph": "f", "id": 133, "pid": 2338711, "tid": 2379440, "ts": 6274141855346.153, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141855348.003, "dur": 2.483, + "args": { + "External id": 503525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141855348.519, "dur": 1.571, + "args": { + "External id": 503526,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 1253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141855349.325, "dur": 0.647, + "args": { + "External id": 503527,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141855353.787, "dur": 54.189, + "args": { + "External id": 503528,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 1255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141855409.190, "dur": 2.909, + "args": { + "External id": 503529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141855409.993, "dur": 1.537, + "args": { + "External id": 503530,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141855410.772, "dur": 0.654, + "args": { + "External id": 503531,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141855413.253, "dur": 7.348, + "args": { + "External id": 503532,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141855414.534, "dur": 5.548, + "args": { + "External id": 503533,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141855419.431, "dur": 0.580, + "args": { + "External id": 503534,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141855421.328, "dur": 54.200, + "args": { + "External id": 503535,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 1262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855488.031, "dur": 23.793, + "args": { + "External id": 503536,"Record function id": 0, "Sequence number": 5516100, "Fwd thread id": 1, "Ev Idx": 1263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855488.822, "dur": 3.503, + "args": { + "External id": 503537,"Sequence number": 5516100, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1264 + } + }, + { + "ph": "f", "id": 134, "pid": 2338711, "tid": 2379440, "ts": 6274141855488.822, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141855490.039, "dur": 2.152, + "args": { + "External id": 503538,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141855490.693, "dur": 1.373, + "args": { + "External id": 503539,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274141855494.537, "dur": 15.110, + "args": { + "External id": 503540,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855515.471, "dur": 10.640, + "args": { + "External id": 503541,"Record function id": 0, "Sequence number": 5516099, "Fwd thread id": 1, "Ev Idx": 1268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855516.320, "dur": 7.919, + "args": { + "External id": 503542,"Sequence number": 5516099, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1269 + } + }, + { + "ph": "f", "id": 135, "pid": 2338711, "tid": 2379440, "ts": 6274141855516.320, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141855517.313, "dur": 6.710, + "args": { + "External id": 503543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141855518.113, "dur": 5.393, + "args": { + "External id": 503544,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141855522.945, "dur": 0.443, + "args": { + "External id": 503545,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141855529.898, "dur": 4.700, + "args": { + "External id": 503546,"Record function id": 0, "Ev Idx": 1273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141855531.057, "dur": 3.074, + "args": { + "External id": 503547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141855532.375, "dur": 1.416, + "args": { + "External id": 503548,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141855532.766, "dur": 0.921, + "args": { + "External id": 503549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141855540.977, "dur": 414.827, + "args": { + "External id": 503550,"Record function id": 0, "Sequence number": 5516098, "Fwd thread id": 1, "Ev Idx": 1277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141855542.304, "dur": 380.632, + "args": { + "External id": 503551,"Sequence number": 5516098, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1278 + } + }, + { + "ph": "f", "id": 136, "pid": 2338711, "tid": 2379440, "ts": 6274141855542.304, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141855578.154, "dur": 1.952, + "args": { + "External id": 503552,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141855578.834, "dur": 1.129, + "args": { + "External id": 503553,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141855618.896, "dur": 6.655, + "args": { + "External id": 503554,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141855638.339, "dur": 3.076, + "args": { + "External id": 503555,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141855809.680, "dur": 3.109, + "args": { + "External id": 503556,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 1283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141855816.707, "dur": 40.647, + "args": { + "External id": 503557,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 1284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141855831.028, "dur": 1.219, + "args": { + "External id": 503558,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 1285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274141855863.124, "dur": 33.623, + "args": { + "External id": 503559,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 1286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274141855865.105, "dur": 31.437, + "args": { + "External id": 503560,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 1287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141855871.019, "dur": 5.185, + "args": { + "External id": 503561,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141855877.907, "dur": 18.073, + "args": { + "External id": 503562,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 1289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274141855904.661, "dur": 2.672, + "args": { + "External id": 503563,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 1290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141855906.006, "dur": 1.186, + "args": { + "External id": 503564,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 1291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141855913.261, "dur": 2.220, + "args": { + "External id": 503565,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141855914.034, "dur": 1.317, + "args": { + "External id": 503566,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274141855934.259, "dur": 17.009, + "args": { + "External id": 503567,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141855968.969, "dur": 13.806, + "args": { + "External id": 503568,"Record function id": 0, "Ev Idx": 1295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141855973.047, "dur": 9.058, + "args": { + "External id": 503569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141855975.194, "dur": 5.695, + "args": { + "External id": 503570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141855978.900, "dur": 1.913, + "args": { + "External id": 503571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855986.628, "dur": 5.552, + "args": { + "External id": 503572,"Record function id": 0, "Sequence number": 5516097, "Fwd thread id": 1, "Ev Idx": 1299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141855987.896, "dur": 1.452, + "args": { + "External id": 503573,"Sequence number": 5516097, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1300 + } + }, + { + "ph": "f", "id": 137, "pid": 2338711, "tid": 2379440, "ts": 6274141855987.896, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141855995.875, "dur": 506.082, + "args": { + "External id": 503574,"Record function id": 0, "Sequence number": 5516096, "Fwd thread id": 1, "Ev Idx": 1301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141855997.142, "dur": 492.967, + "args": { + "External id": 503575,"Sequence number": 5516096, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1302 + } + }, + { + "ph": "f", "id": 138, "pid": 2338711, "tid": 2379440, "ts": 6274141855997.142, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141856026.531, "dur": 8.316, + "args": { + "External id": 503576,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338711, "tid": 2379440, + "ts": 6274141856031.184, "dur": 3.366, + "args": { + "External id": 503577,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 1304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141856037.792, "dur": 8.063, + "args": { + "External id": 503578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141856042.108, "dur": 3.094, + "args": { + "External id": 503579,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141856044.319, "dur": 0.696, + "args": { + "External id": 503580,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2379440, + "ts": 6274141856049.578, "dur": 150.131, + "args": { + "External id": 503581,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 1308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141856050.469, "dur": 2.246, + "args": { + "External id": 503582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 1309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141856051.011, "dur": 1.272, + "args": { + "External id": 503583,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141856051.645, "dur": 0.526, + "args": { + "External id": 503584,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2379440, + "ts": 6274141856092.475, "dur": 106.565, + "args": { + "External id": 503585,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 1312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141856095.280, "dur": 102.723, + "args": { + "External id": 503586,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 1313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274141856204.631, "dur": 5.539, + "args": { + "External id": 503587,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 1314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141856207.870, "dur": 2.191, + "args": { + "External id": 503588,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 1315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141856246.240, "dur": 6.654, + "args": { + "External id": 503589,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141856254.387, "dur": 4.380, + "args": { + "External id": 503590,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141856259.948, "dur": 1.931, + "args": { + "External id": 503591,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141856299.011, "dur": 2.212, + "args": { + "External id": 503592,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141856299.923, "dur": 1.148, + "args": { + "External id": 503593,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338711, "tid": 2379440, + "ts": 6274141856322.574, "dur": 148.031, + "args": { + "External id": 503594,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 1321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2379440, + "ts": 6274141856328.223, "dur": 6.651, + "args": { + "External id": 503595,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141856332.619, "dur": 1.009, + "args": { + "External id": 503596,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274141856336.723, "dur": 7.180, + "args": { + "External id": 503597,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 1324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141856342.698, "dur": 0.533, + "args": { + "External id": 503598,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 1325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2379440, + "ts": 6274141856345.368, "dur": 3.395, + "args": { + "External id": 503599,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 1326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141856348.078, "dur": 0.351, + "args": { + "External id": 503600,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274141856349.848, "dur": 3.170, + "args": { + "External id": 503601,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 1328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141856352.015, "dur": 0.454, + "args": { + "External id": 503602,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 1329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274141856359.482, "dur": 6.169, + "args": { + "External id": 503603,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 1330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141856362.098, "dur": 3.231, + "args": { + "External id": 503604,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 1331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141856367.021, "dur": 6.330, + "args": { + "External id": 503605,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 1332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338711, "tid": 2379440, + "ts": 6274141856371.276, "dur": 1.854, + "args": { + "External id": 503606,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 1333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274141856374.520, "dur": 3.185, + "args": { + "External id": 503607,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 1334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141856376.983, "dur": 0.420, + "args": { + "External id": 503608,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 1335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141856378.863, "dur": 3.658, + "args": { + "External id": 503609,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 1336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141856379.883, "dur": 2.548, + "args": { + "External id": 503610,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 1337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274141856383.807, "dur": 72.145, + "args": { + "External id": 503611,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 1338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141856458.043, "dur": 1.129, + "args": { + "External id": 503612,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 1339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274141856462.856, "dur": 3.322, + "args": { + "External id": 503613,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 1340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141856465.005, "dur": 0.487, + "args": { + "External id": 503614,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 1341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141856468.587, "dur": 0.821, + "args": { + "External id": 503615,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 1342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141856514.168, "dur": 14.908, + "args": { + "External id": 503616,"Record function id": 0, "Ev Idx": 1343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141856516.582, "dur": 11.822, + "args": { + "External id": 503617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141856519.098, "dur": 8.437, + "args": { + "External id": 503618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141856523.509, "dur": 3.915, + "args": { + "External id": 503619,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141856532.705, "dur": 10.335, + "args": { + "External id": 503620,"Record function id": 0, "Sequence number": 5516095, "Fwd thread id": 1, "Ev Idx": 1347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141856533.996, "dur": 6.952, + "args": { + "External id": 503621,"Sequence number": 5516095, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 1348 + } + }, + { + "ph": "f", "id": 139, "pid": 2338711, "tid": 2379440, "ts": 6274141856533.996, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141856538.783, "dur": 1.956, + "args": { + "External id": 503622,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141856539.735, "dur": 0.852, + "args": { + "External id": 503623,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141856546.762, "dur": 159.461, + "args": { + "External id": 503624,"Record function id": 0, "Sequence number": 5516094, "Fwd thread id": 1, "Ev Idx": 1351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141856547.566, "dur": 148.937, + "args": { + "External id": 503625,"Sequence number": 5516094, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1352 + } + }, + { + "ph": "f", "id": 140, "pid": 2338711, "tid": 2379440, "ts": 6274141856547.566, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141856550.899, "dur": 6.996, + "args": { + "External id": 503626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141856552.457, "dur": 4.964, + "args": { + "External id": 503627,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 1354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141856556.681, "dur": 0.567, + "args": { + "External id": 503628,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141856559.214, "dur": 75.379, + "args": { + "External id": 503629,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 1356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141856637.418, "dur": 6.509, + "args": { + "External id": 503630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141856638.902, "dur": 3.925, + "args": { + "External id": 503631,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141856641.712, "dur": 0.895, + "args": { + "External id": 503632,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141856645.807, "dur": 7.503, + "args": { + "External id": 503633,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141856647.114, "dur": 5.469, + "args": { + "External id": 503634,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141856651.689, "dur": 0.713, + "args": { + "External id": 503635,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141856654.694, "dur": 41.109, + "args": { + "External id": 503636,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 1363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141856713.623, "dur": 7.900, + "args": { + "External id": 503637,"Record function id": 0, "Sequence number": 5516093, "Fwd thread id": 1, "Ev Idx": 1364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141856714.730, "dur": 5.477, + "args": { + "External id": 503638,"Sequence number": 5516093, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1365 + } + }, + { + "ph": "f", "id": 141, "pid": 2338711, "tid": 2379440, "ts": 6274141856714.730, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141856717.062, "dur": 3.007, + "args": { + "External id": 503639,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141856718.135, "dur": 1.784, + "args": { + "External id": 503640,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141856724.724, "dur": 11.410, + "args": { + "External id": 503641,"Record function id": 0, "Sequence number": 5516092, "Fwd thread id": 1, "Ev Idx": 1368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141856725.534, "dur": 8.968, + "args": { + "External id": 503642,"Sequence number": 5516092, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1369 + } + }, + { + "ph": "f", "id": 142, "pid": 2338711, "tid": 2379440, "ts": 6274141856725.534, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141856726.411, "dur": 7.880, + "args": { + "External id": 503643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141856727.219, "dur": 6.574, + "args": { + "External id": 503644,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141856731.038, "dur": 2.642, + "args": { + "External id": 503645,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141856740.756, "dur": 7.205, + "args": { + "External id": 503646,"Record function id": 0, "Ev Idx": 1373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141856742.549, "dur": 4.819, + "args": { + "External id": 503647,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141856744.174, "dur": 2.845, + "args": { + "External id": 503648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141856745.231, "dur": 1.675, + "args": { + "External id": 503649,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141856751.414, "dur": 6.448, + "args": { + "External id": 503650,"Record function id": 0, "Sequence number": 5516091, "Fwd thread id": 1, "Ev Idx": 1377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141856752.855, "dur": 3.065, + "args": { + "External id": 503651,"Sequence number": 5516091, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 1378 + } + }, + { + "ph": "f", "id": 143, "pid": 2338711, "tid": 2379440, "ts": 6274141856752.855, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141856753.952, "dur": 1.827, + "args": { + "External id": 503652,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141856754.828, "dur": 0.821, + "args": { + "External id": 503653,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141856760.988, "dur": 98.327, + "args": { + "External id": 503654,"Record function id": 0, "Sequence number": 5516090, "Fwd thread id": 1, "Ev Idx": 1381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141856764.149, "dur": 88.353, + "args": { + "External id": 503655,"Sequence number": 5516090, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1382 + } + }, + { + "ph": "f", "id": 144, "pid": 2338711, "tid": 2379440, "ts": 6274141856764.149, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141856766.003, "dur": 3.579, + "args": { + "External id": 503656,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141856766.668, "dur": 2.439, + "args": { + "External id": 503657,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 1384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141856768.415, "dur": 0.553, + "args": { + "External id": 503658,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141856770.254, "dur": 35.529, + "args": { + "External id": 503659,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 1386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141856806.906, "dur": 6.018, + "args": { + "External id": 503660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141856807.719, "dur": 4.529, + "args": { + "External id": 503661,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141856811.368, "dur": 0.771, + "args": { + "External id": 503662,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141856813.899, "dur": 2.936, + "args": { + "External id": 503663,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141856814.534, "dur": 1.816, + "args": { + "External id": 503664,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141856815.829, "dur": 0.456, + "args": { + "External id": 503665,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141856817.507, "dur": 34.260, + "args": { + "External id": 503666,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 1393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141856865.370, "dur": 38.323, + "args": { + "External id": 503667,"Record function id": 0, "Sequence number": 5516089, "Fwd thread id": 1, "Ev Idx": 1394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141856866.488, "dur": 9.320, + "args": { + "External id": 503668,"Sequence number": 5516089, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1395 + } + }, + { + "ph": "f", "id": 145, "pid": 2338711, "tid": 2379440, "ts": 6274141856866.488, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141856870.542, "dur": 5.112, + "args": { + "External id": 503669,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141856872.125, "dur": 3.384, + "args": { + "External id": 503670,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2379440, + "ts": 6274141856878.982, "dur": 21.427, + "args": { + "External id": 503671,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141856907.362, "dur": 8.482, + "args": { + "External id": 503672,"Record function id": 0, "Sequence number": 5516088, "Fwd thread id": 1, "Ev Idx": 1399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141856908.539, "dur": 5.379, + "args": { + "External id": 503673,"Sequence number": 5516088, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1400 + } + }, + { + "ph": "f", "id": 146, "pid": 2338711, "tid": 2379440, "ts": 6274141856908.539, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141856909.467, "dur": 4.205, + "args": { + "External id": 503674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141856910.387, "dur": 2.712, + "args": { + "External id": 503675,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141856912.235, "dur": 0.754, + "args": { + "External id": 503676,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141856919.936, "dur": 7.564, + "args": { + "External id": 503677,"Record function id": 0, "Ev Idx": 1404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141856921.748, "dur": 5.248, + "args": { + "External id": 503678,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141856922.769, "dur": 3.911, + "args": { + "External id": 503679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141856925.561, "dur": 1.019, + "args": { + "External id": 503680,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141856931.773, "dur": 481.259, + "args": { + "External id": 503681,"Record function id": 0, "Sequence number": 5516087, "Fwd thread id": 1, "Ev Idx": 1408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141856933.668, "dur": 442.337, + "args": { + "External id": 503682,"Sequence number": 5516087, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 1409 + } + }, + { + "ph": "f", "id": 147, "pid": 2338711, "tid": 2379440, "ts": 6274141856933.668, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2379440, + "ts": 6274141856960.249, "dur": 36.274, + "args": { + "External id": 503683,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274141856962.352, "dur": 33.967, + "args": { + "External id": 503684,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141856965.303, "dur": 6.704, + "args": { + "External id": 503685,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 1412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141856968.071, "dur": 3.409, + "args": { + "External id": 503686,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141856973.282, "dur": 22.290, + "args": { + "External id": 503687,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141857007.569, "dur": 2.628, + "args": { + "External id": 503688,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141857008.714, "dur": 1.334, + "args": { + "External id": 503689,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141857016.930, "dur": 1.511, + "args": { + "External id": 503690,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141857017.567, "dur": 0.783, + "args": { + "External id": 503691,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141857030.516, "dur": 2.523, + "args": { + "External id": 503692,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141857044.305, "dur": 4.579, + "args": { + "External id": 503693,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141857259.610, "dur": 3.629, + "args": { + "External id": 503694,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 1421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141857268.012, "dur": 38.468, + "args": { + "External id": 503695,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 1422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141857281.798, "dur": 1.108, + "args": { + "External id": 503696,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 1423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274141857313.319, "dur": 28.923, + "args": { + "External id": 503697,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 1424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274141857315.153, "dur": 26.893, + "args": { + "External id": 503698,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 1425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141857319.903, "dur": 4.256, + "args": { + "External id": 503699,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141857325.971, "dur": 15.519, + "args": { + "External id": 503700,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 1427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274141857347.046, "dur": 2.778, + "args": { + "External id": 503701,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 1428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141857348.532, "dur": 1.192, + "args": { + "External id": 503702,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 1429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141857359.141, "dur": 2.822, + "args": { + "External id": 503703,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141857360.723, "dur": 1.096, + "args": { + "External id": 503704,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141857364.788, "dur": 2.123, + "args": { + "External id": 503705,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141857365.670, "dur": 1.143, + "args": { + "External id": 503706,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274141857393.761, "dur": 17.465, + "args": { + "External id": 503707,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141857427.278, "dur": 9.970, + "args": { + "External id": 503708,"Record function id": 0, "Ev Idx": 1435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141857429.889, "dur": 6.561, + "args": { + "External id": 503709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141857432.285, "dur": 3.356, + "args": { + "External id": 503710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141857433.590, "dur": 1.914, + "args": { + "External id": 503711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141857441.381, "dur": 10.490, + "args": { + "External id": 503712,"Record function id": 0, "Sequence number": 5516086, "Fwd thread id": 1, "Ev Idx": 1439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141857442.655, "dur": 7.130, + "args": { + "External id": 503713,"Sequence number": 5516086, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1440 + } + }, + { + "ph": "f", "id": 148, "pid": 2338711, "tid": 2379440, "ts": 6274141857442.655, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141857447.361, "dur": 2.237, + "args": { + "External id": 503714,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141857448.313, "dur": 1.120, + "args": { + "External id": 503715,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141857455.453, "dur": 177.113, + "args": { + "External id": 503716,"Record function id": 0, "Sequence number": 5516085, "Fwd thread id": 1, "Ev Idx": 1443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141857456.190, "dur": 166.048, + "args": { + "External id": 503717,"Sequence number": 5516085, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1444 + } + }, + { + "ph": "f", "id": 149, "pid": 2338711, "tid": 2379440, "ts": 6274141857456.190, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141857458.980, "dur": 7.964, + "args": { + "External id": 503718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141857460.413, "dur": 5.775, + "args": { + "External id": 503719,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 1446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141857465.105, "dur": 0.876, + "args": { + "External id": 503720,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141857468.153, "dur": 72.893, + "args": { + "External id": 503721,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 1448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141857542.410, "dur": 7.031, + "args": { + "External id": 503722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141857543.094, "dur": 5.523, + "args": { + "External id": 503723,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141857545.061, "dur": 3.428, + "args": { + "External id": 503724,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141857550.809, "dur": 6.415, + "args": { + "External id": 503725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141857552.181, "dur": 4.553, + "args": { + "External id": 503726,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141857556.158, "dur": 0.491, + "args": { + "External id": 503727,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141857557.740, "dur": 63.013, + "args": { + "External id": 503728,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 1455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141857643.293, "dur": 11.949, + "args": { + "External id": 503729,"Record function id": 0, "Sequence number": 5516084, "Fwd thread id": 1, "Ev Idx": 1456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141857645.253, "dur": 7.882, + "args": { + "External id": 503730,"Sequence number": 5516084, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1457 + } + }, + { + "ph": "f", "id": 150, "pid": 2338711, "tid": 2379440, "ts": 6274141857645.253, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141857648.351, "dur": 4.563, + "args": { + "External id": 503731,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141857650.384, "dur": 2.319, + "args": { + "External id": 503732,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141857659.375, "dur": 10.701, + "args": { + "External id": 503733,"Record function id": 0, "Sequence number": 5516083, "Fwd thread id": 1, "Ev Idx": 1460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141857660.082, "dur": 8.022, + "args": { + "External id": 503734,"Sequence number": 5516083, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1461 + } + }, + { + "ph": "f", "id": 151, "pid": 2338711, "tid": 2379440, "ts": 6274141857660.082, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141857660.751, "dur": 7.164, + "args": { + "External id": 503735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141857664.442, "dur": 2.946, + "args": { + "External id": 503736,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141857666.646, "dur": 0.630, + "args": { + "External id": 503737,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141857674.536, "dur": 6.808, + "args": { + "External id": 503738,"Record function id": 0, "Ev Idx": 1465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141857676.505, "dur": 4.264, + "args": { + "External id": 503739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141857677.973, "dur": 2.522, + "args": { + "External id": 503740,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141857678.637, "dur": 1.758, + "args": { + "External id": 503741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141857684.238, "dur": 5.696, + "args": { + "External id": 503742,"Record function id": 0, "Sequence number": 5516082, "Fwd thread id": 1, "Ev Idx": 1469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141857684.969, "dur": 3.524, + "args": { + "External id": 503743,"Sequence number": 5516082, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1470 + } + }, + { + "ph": "f", "id": 152, "pid": 2338711, "tid": 2379440, "ts": 6274141857684.969, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141857685.993, "dur": 2.349, + "args": { + "External id": 503744,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141857686.933, "dur": 1.269, + "args": { + "External id": 503745,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141857693.965, "dur": 407.448, + "args": { + "External id": 503746,"Record function id": 0, "Sequence number": 5516081, "Fwd thread id": 1, "Ev Idx": 1473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141857695.819, "dur": 347.479, + "args": { + "External id": 503747,"Sequence number": 5516081, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1474 + } + }, + { + "ph": "f", "id": 153, "pid": 2338711, "tid": 2379440, "ts": 6274141857695.819, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141857714.810, "dur": 7.898, + "args": { + "External id": 503748,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 1475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141857717.966, "dur": 4.262, + "args": { + "External id": 503749,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141857724.890, "dur": 6.643, + "args": { + "External id": 503750,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141857726.172, "dur": 5.165, + "args": { + "External id": 503751,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141857733.067, "dur": 5.835, + "args": { + "External id": 503752,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141857735.242, "dur": 3.474, + "args": { + "External id": 503753,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141857768.784, "dur": 248.121, + "args": { + "External id": 503754,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141857852.416, "dur": 4.040, + "args": { + "External id": 503755,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141857858.276, "dur": 4.254, + "args": { + "External id": 503756,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141857863.996, "dur": 1.814, + "args": { + "External id": 503757,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141857866.950, "dur": 1.773, + "args": { + "External id": 503758,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141857916.780, "dur": 4.998, + "args": { + "External id": 503759,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141857920.120, "dur": 1.546, + "args": { + "External id": 503760,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141857925.661, "dur": 27.443, + "args": { + "External id": 503761,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141857931.546, "dur": 0.871, + "args": { + "External id": 503762,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141857954.546, "dur": 1.405, + "args": { + "External id": 503763,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141857955.429, "dur": 0.460, + "args": { + "External id": 503764,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141857956.867, "dur": 16.984, + "args": { + "External id": 503765,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141857958.872, "dur": 3.287, + "args": { + "External id": 503766,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274141858030.572, "dur": 3.533, + "args": { + "External id": 503767,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274141858037.317, "dur": 0.614, + "args": { + "External id": 503768,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274141858039.842, "dur": 0.741, + "args": { + "External id": 503769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141858113.108, "dur": 242.897, + "args": { + "External id": 503770,"Record function id": 0, "Sequence number": 5516080, "Fwd thread id": 1, "Ev Idx": 1497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141858115.473, "dur": 232.885, + "args": { + "External id": 503771,"Sequence number": 5516080, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1498 + } + }, + { + "ph": "f", "id": 154, "pid": 2338711, "tid": 2379440, "ts": 6274141858115.473, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274141858138.233, "dur": 49.006, + "args": { + "External id": 503772,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141858142.267, "dur": 4.162, + "args": { + "External id": 503773,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141858148.129, "dur": 38.608, + "args": { + "External id": 503774,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 1501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141858198.365, "dur": 5.613, + "args": { + "External id": 503775,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141858200.906, "dur": 2.699, + "args": { + "External id": 503776,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141858363.667, "dur": 162.567, + "args": { + "External id": 503777,"Record function id": 0, "Sequence number": 5516079, "Fwd thread id": 1, "Ev Idx": 1504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141858365.972, "dur": 154.396, + "args": { + "External id": 503778,"Sequence number": 5516079, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1505 + } + }, + { + "ph": "f", "id": 155, "pid": 2338711, "tid": 2379440, "ts": 6274141858365.972, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274141858382.922, "dur": 37.769, + "args": { + "External id": 503779,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141858385.276, "dur": 3.065, + "args": { + "External id": 503780,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141858389.478, "dur": 30.690, + "args": { + "External id": 503781,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 1508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274141858428.002, "dur": 6.260, + "args": { + "External id": 503782,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 1509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141858430.337, "dur": 3.664, + "args": { + "External id": 503783,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141858532.215, "dur": 18.896, + "args": { + "External id": 503784,"Record function id": 0, "Sequence number": 5516078, "Fwd thread id": 1, "Ev Idx": 1511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141858533.585, "dur": 14.600, + "args": { + "External id": 503785,"Sequence number": 5516078, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1512 + } + }, + { + "ph": "f", "id": 156, "pid": 2338711, "tid": 2379440, "ts": 6274141858533.585, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141858536.217, "dur": 11.674, + "args": { + "External id": 503786,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141858539.919, "dur": 7.748, + "args": { + "External id": 503787,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141858557.282, "dur": 6.470, + "args": { + "External id": 503788,"Record function id": 0, "Sequence number": 5516077, "Fwd thread id": 1, "Ev Idx": 1515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141858558.300, "dur": 3.725, + "args": { + "External id": 503789,"Sequence number": 5516077, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1516 + } + }, + { + "ph": "f", "id": 157, "pid": 2338711, "tid": 2379440, "ts": 6274141858558.300, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141858559.914, "dur": 1.987, + "args": { + "External id": 503790,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141858560.933, "dur": 0.815, + "args": { + "External id": 503791,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141858567.245, "dur": 6.167, + "args": { + "External id": 503792,"Record function id": 0, "Sequence number": 5516076, "Fwd thread id": 1, "Ev Idx": 1519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141858568.085, "dur": 3.477, + "args": { + "External id": 503793,"Sequence number": 5516076, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1520 + } + }, + { + "ph": "f", "id": 158, "pid": 2338711, "tid": 2379440, "ts": 6274141858568.085, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141858569.505, "dur": 1.931, + "args": { + "External id": 503794,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141858570.565, "dur": 0.789, + "args": { + "External id": 503795,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141858576.862, "dur": 27.925, + "args": { + "External id": 503796,"Record function id": 0, "Sequence number": 5516075, "Fwd thread id": 1, "Ev Idx": 1523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141858577.983, "dur": 5.566, + "args": { + "External id": 503797,"Sequence number": 5516075, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 1524 + } + }, + { + "ph": "f", "id": 159, "pid": 2338711, "tid": 2379440, "ts": 6274141858577.983, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141858579.161, "dur": 4.259, + "args": { + "External id": 503798,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141858582.264, "dur": 1.059, + "args": { + "External id": 503799,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141858612.849, "dur": 216.822, + "args": { + "External id": 503800,"Record function id": 0, "Sequence number": 5516074, "Fwd thread id": 1, "Ev Idx": 1527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141858614.744, "dur": 205.359, + "args": { + "External id": 503801,"Sequence number": 5516074, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1528 + } + }, + { + "ph": "f", "id": 160, "pid": 2338711, "tid": 2379440, "ts": 6274141858614.744, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141858619.476, "dur": 9.669, + "args": { + "External id": 503802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141858622.194, "dur": 5.855, + "args": { + "External id": 503803,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 1530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141858625.699, "dur": 1.923, + "args": { + "External id": 503804,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 1531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141858633.589, "dur": 88.481, + "args": { + "External id": 503805,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 1532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141858723.807, "dur": 5.560, + "args": { + "External id": 503806,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141858724.917, "dur": 3.561, + "args": { + "External id": 503807,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 1534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141858727.238, "dur": 1.071, + "args": { + "External id": 503808,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 1535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141858758.344, "dur": 7.367, + "args": { + "External id": 503809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141858759.547, "dur": 5.684, + "args": { + "External id": 503810,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141858764.770, "dur": 0.360, + "args": { + "External id": 503811,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141858766.570, "dur": 52.571, + "args": { + "External id": 503812,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 1539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141858835.901, "dur": 8.110, + "args": { + "External id": 503813,"Record function id": 0, "Sequence number": 5516073, "Fwd thread id": 1, "Ev Idx": 1540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141858836.767, "dur": 5.155, + "args": { + "External id": 503814,"Sequence number": 5516073, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1541 + } + }, + { + "ph": "f", "id": 161, "pid": 2338711, "tid": 2379440, "ts": 6274141858836.767, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141858838.371, "dur": 3.410, + "args": { + "External id": 503815,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141858839.863, "dur": 1.740, + "args": { + "External id": 503816,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141858847.473, "dur": 10.081, + "args": { + "External id": 503817,"Record function id": 0, "Sequence number": 5516072, "Fwd thread id": 1, "Ev Idx": 1544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141858848.327, "dur": 6.808, + "args": { + "External id": 503818,"Sequence number": 5516072, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1545 + } + }, + { + "ph": "f", "id": 162, "pid": 2338711, "tid": 2379440, "ts": 6274141858848.327, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141858849.127, "dur": 5.810, + "args": { + "External id": 503819,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141858852.275, "dur": 2.172, + "args": { + "External id": 503820,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141858853.823, "dur": 0.548, + "args": { + "External id": 503821,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141858863.693, "dur": 10.578, + "args": { + "External id": 503822,"Record function id": 0, "Ev Idx": 1549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141858866.192, "dur": 7.322, + "args": { + "External id": 503823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141858868.910, "dur": 4.164, + "args": { + "External id": 503824,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141858870.221, "dur": 2.745, + "args": { + "External id": 503825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141858878.165, "dur": 6.012, + "args": { + "External id": 503826,"Record function id": 0, "Sequence number": 5516071, "Fwd thread id": 1, "Ev Idx": 1553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141858879.590, "dur": 2.748, + "args": { + "External id": 503827,"Sequence number": 5516071, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 1554 + } + }, + { + "ph": "f", "id": 163, "pid": 2338711, "tid": 2379440, "ts": 6274141858879.590, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141858880.611, "dur": 1.557, + "args": { + "External id": 503828,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141858881.330, "dur": 0.690, + "args": { + "External id": 503829,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141858889.734, "dur": 99.995, + "args": { + "External id": 503830,"Record function id": 0, "Sequence number": 5516070, "Fwd thread id": 1, "Ev Idx": 1557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141858890.491, "dur": 91.551, + "args": { + "External id": 503831,"Sequence number": 5516070, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1558 + } + }, + { + "ph": "f", "id": 164, "pid": 2338711, "tid": 2379440, "ts": 6274141858890.491, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141858892.667, "dur": 6.123, + "args": { + "External id": 503832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141858893.128, "dur": 5.211, + "args": { + "External id": 503833,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 1560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141858897.645, "dur": 0.580, + "args": { + "External id": 503834,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 1561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141858899.421, "dur": 27.578, + "args": { + "External id": 503835,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 1562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141858930.504, "dur": 5.817, + "args": { + "External id": 503836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141858931.003, "dur": 4.653, + "args": { + "External id": 503837,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 1564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141858933.021, "dur": 2.476, + "args": { + "External id": 503838,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 1565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141858937.562, "dur": 5.494, + "args": { + "External id": 503839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141858938.365, "dur": 4.200, + "args": { + "External id": 503840,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141858939.860, "dur": 2.632, + "args": { + "External id": 503841,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141858945.752, "dur": 35.520, + "args": { + "External id": 503842,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 1569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141858996.668, "dur": 33.180, + "args": { + "External id": 503843,"Record function id": 0, "Sequence number": 5516069, "Fwd thread id": 1, "Ev Idx": 1570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141858997.401, "dur": 4.666, + "args": { + "External id": 503844,"Sequence number": 5516069, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1571 + } + }, + { + "ph": "f", "id": 165, "pid": 2338711, "tid": 2379440, "ts": 6274141858997.401, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141858999.607, "dur": 2.327, + "args": { + "External id": 503845,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141859000.581, "dur": 1.249, + "args": { + "External id": 503846,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2379440, + "ts": 6274141859005.421, "dur": 21.650, + "args": { + "External id": 503847,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141859033.472, "dur": 7.375, + "args": { + "External id": 503848,"Record function id": 0, "Sequence number": 5516068, "Fwd thread id": 1, "Ev Idx": 1575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141859034.253, "dur": 4.798, + "args": { + "External id": 503849,"Sequence number": 5516068, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1576 + } + }, + { + "ph": "f", "id": 166, "pid": 2338711, "tid": 2379440, "ts": 6274141859034.253, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141859035.086, "dur": 3.746, + "args": { + "External id": 503850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141859035.964, "dur": 2.382, + "args": { + "External id": 503851,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141859037.753, "dur": 0.453, + "args": { + "External id": 503852,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141859044.940, "dur": 7.556, + "args": { + "External id": 503853,"Record function id": 0, "Ev Idx": 1580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141859046.699, "dur": 5.208, + "args": { + "External id": 503854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141859047.700, "dur": 3.780, + "args": { + "External id": 503855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141859050.156, "dur": 1.198, + "args": { + "External id": 503856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141859092.366, "dur": 9.681, + "args": { + "External id": 503857,"Record function id": 0, "Sequence number": 5516067, "Fwd thread id": 1, "Ev Idx": 1584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141859094.670, "dur": 5.020, + "args": { + "External id": 503858,"Sequence number": 5516067, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1585 + } + }, + { + "ph": "f", "id": 167, "pid": 2338711, "tid": 2379440, "ts": 6274141859094.670, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141859096.375, "dur": 3.134, + "args": { + "External id": 503859,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141859097.399, "dur": 1.855, + "args": { + "External id": 503860,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141859106.616, "dur": 111.150, + "args": { + "External id": 503861,"Record function id": 0, "Sequence number": 5516066, "Fwd thread id": 1, "Ev Idx": 1588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141859107.346, "dur": 102.534, + "args": { + "External id": 503862,"Sequence number": 5516066, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1589 + } + }, + { + "ph": "f", "id": 168, "pid": 2338711, "tid": 2379440, "ts": 6274141859107.346, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141859109.470, "dur": 5.884, + "args": { + "External id": 503863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141859112.697, "dur": 2.176, + "args": { + "External id": 503864,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 1591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141859114.259, "dur": 0.487, + "args": { + "External id": 503865,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141859115.999, "dur": 45.960, + "args": { + "External id": 503866,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 1593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141859163.143, "dur": 4.171, + "args": { + "External id": 503867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141859163.761, "dur": 2.849, + "args": { + "External id": 503868,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141859165.500, "dur": 0.991, + "args": { + "External id": 503869,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141859170.948, "dur": 3.327, + "args": { + "External id": 503870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141859172.014, "dur": 1.793, + "args": { + "External id": 503871,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141859173.389, "dur": 0.352, + "args": { + "External id": 503872,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141859174.743, "dur": 34.439, + "args": { + "External id": 503873,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 1600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141859222.686, "dur": 30.329, + "args": { + "External id": 503874,"Record function id": 0, "Sequence number": 5516065, "Fwd thread id": 1, "Ev Idx": 1601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141859223.405, "dur": 6.846, + "args": { + "External id": 503875,"Sequence number": 5516065, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1602 + } + }, + { + "ph": "f", "id": 169, "pid": 2338711, "tid": 2379440, "ts": 6274141859223.405, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141859227.706, "dur": 2.410, + "args": { + "External id": 503876,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141859228.834, "dur": 1.128, + "args": { + "External id": 503877,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274141859232.734, "dur": 17.550, + "args": { + "External id": 503878,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141859256.439, "dur": 7.248, + "args": { + "External id": 503879,"Record function id": 0, "Sequence number": 5516064, "Fwd thread id": 1, "Ev Idx": 1606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274141859257.154, "dur": 4.741, + "args": { + "External id": 503880,"Sequence number": 5516064, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1607 + } + }, + { + "ph": "f", "id": 170, "pid": 2338711, "tid": 2379440, "ts": 6274141859257.154, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274141859257.790, "dur": 3.890, + "args": { + "External id": 503881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274141859258.681, "dur": 2.498, + "args": { + "External id": 503882,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141859260.530, "dur": 0.492, + "args": { + "External id": 503883,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141859267.677, "dur": 5.613, + "args": { + "External id": 503884,"Record function id": 0, "Ev Idx": 1611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141859269.312, "dur": 3.483, + "args": { + "External id": 503885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141859270.480, "dur": 1.940, + "args": { + "External id": 503886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141859270.986, "dur": 1.338, + "args": { + "External id": 503887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141859277.894, "dur": 412.832, + "args": { + "External id": 503888,"Record function id": 0, "Sequence number": 5516063, "Fwd thread id": 1, "Ev Idx": 1615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141859279.253, "dur": 373.295, + "args": { + "External id": 503889,"Sequence number": 5516063, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1616 + } + }, + { + "ph": "f", "id": 171, "pid": 2338711, "tid": 2379440, "ts": 6274141859279.253, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141859320.435, "dur": 2.298, + "args": { + "External id": 503890,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141859321.392, "dur": 1.240, + "args": { + "External id": 503891,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141859337.694, "dur": 6.454, + "args": { + "External id": 503892,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141859354.415, "dur": 2.193, + "args": { + "External id": 503893,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141859508.977, "dur": 1.647, + "args": { + "External id": 503894,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 1621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141859514.915, "dur": 35.455, + "args": { + "External id": 503895,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 1622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141859526.875, "dur": 1.027, + "args": { + "External id": 503896,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 1623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274141859556.196, "dur": 59.295, + "args": { + "External id": 503897,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 1624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274141859560.690, "dur": 54.543, + "args": { + "External id": 503898,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 1625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141859565.655, "dur": 4.157, + "args": { + "External id": 503899,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141859571.366, "dur": 42.645, + "args": { + "External id": 503900,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 1627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274141859623.387, "dur": 4.535, + "args": { + "External id": 503901,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 1628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141859625.577, "dur": 2.116, + "args": { + "External id": 503902,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 1629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141859636.938, "dur": 6.017, + "args": { + "External id": 503903,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141859641.261, "dur": 1.438, + "args": { + "External id": 503904,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274141859667.846, "dur": 17.745, + "args": { + "External id": 503905,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141859705.229, "dur": 10.433, + "args": { + "External id": 503906,"Record function id": 0, "Ev Idx": 1633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141859708.145, "dur": 6.514, + "args": { + "External id": 503907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141859710.368, "dur": 3.165, + "args": { + "External id": 503908,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141859711.455, "dur": 1.980, + "args": { + "External id": 503909,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141859720.167, "dur": 2910.821, + "args": { + "External id": 503910,"Record function id": 0, "Ev Idx": 1637 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.25)", "pid": 2338711, "tid": 2379440, + "ts": 6274141859755.474, "dur": 977.736, + "args": { + "External id": 503911,"Record function id": 0, "Ev Idx": 1638 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.24", "pid": 2338711, "tid": 2379440, + "ts": 6274141859781.305, "dur": 942.826, + "args": { + "External id": 503912,"Record function id": 0, "Ev Idx": 1639 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2338711, "tid": 2379440, + "ts": 6274141859796.592, "dur": 911.256, + "args": { + "External id": 503913,"Record function id": 0, "Ev Idx": 1640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141859875.939, "dur": 7.143, + "args": { + "External id": 503914,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274141859898.643, "dur": 35.240, + "args": { + "External id": 503915,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141859903.161, "dur": 1.817, + "args": { + "External id": 503916,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141859909.728, "dur": 2.994, + "args": { + "External id": 503917,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141859914.007, "dur": 0.380, + "args": { + "External id": 503918,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141859915.916, "dur": 1.875, + "args": { + "External id": 503919,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141859919.484, "dur": 0.516, + "args": { + "External id": 503920,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141859921.738, "dur": 0.442, + "args": { + "External id": 503921,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141859925.307, "dur": 0.472, + "args": { + "External id": 503922,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141859926.958, "dur": 0.363, + "args": { + "External id": 503923,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141859928.733, "dur": 0.355, + "args": { + "External id": 503924,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141859944.793, "dur": 42.314, + "args": { + "External id": 503925,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274141860027.120, "dur": 176.049, + "args": { + "External id": 503926,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 1653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141860038.451, "dur": 7.112, + "args": { + "External id": 503927,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274141860050.560, "dur": 53.300, + "args": { + "External id": 503928,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274141860054.780, "dur": 48.645, + "args": { + "External id": 503929,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 1656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141860098.908, "dur": 2.622, + "args": { + "External id": 503930,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274141860112.964, "dur": 31.084, + "args": { + "External id": 503931,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141860115.958, "dur": 0.407, + "args": { + "External id": 503932,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141860118.142, "dur": 0.381, + "args": { + "External id": 503933,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141860120.161, "dur": 0.574, + "args": { + "External id": 503934,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141860123.748, "dur": 0.415, + "args": { + "External id": 503935,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141860125.679, "dur": 0.285, + "args": { + "External id": 503936,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141860127.261, "dur": 2.177, + "args": { + "External id": 503937,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141860131.255, "dur": 2.840, + "args": { + "External id": 503938,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141860135.476, "dur": 0.551, + "args": { + "External id": 503939,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141860138.947, "dur": 0.326, + "args": { + "External id": 503940,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141860156.788, "dur": 37.157, + "args": { + "External id": 503941,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274141860263.296, "dur": 304.911, + "args": { + "External id": 503942,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 1669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274141860297.758, "dur": 265.393, + "args": { + "External id": 503943,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1670, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274141860308.713, "dur": 249.238, + "args": { + "External id": 503944,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 1671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274141860613.209, "dur": 4.063, + "args": { + "External id": 503945,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1672, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141860741.964, "dur": 1862.544, + "args": { + "External id": 503946,"Sequence number": 5516062, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1673 + } + }, + { + "ph": "f", "id": 172, "pid": 2338711, "tid": 2379440, "ts": 6274141860741.964, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141860864.508, "dur": 120.748, + "args": { + "External id": 503947,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 1674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274141861030.522, "dur": 82.100, + "args": { + "External id": 503948,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 1675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274141861136.497, "dur": 59.102, + "args": { + "External id": 503949,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 1676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141861206.499, "dur": 33.424, + "args": { + "External id": 503950,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141861245.794, "dur": 34.397, + "args": { + "External id": 503951,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141861289.113, "dur": 29.006, + "args": { + "External id": 503952,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141861325.864, "dur": 29.926, + "args": { + "External id": 503953,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274141861386.291, "dur": 27.719, + "args": { + "External id": 503954,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 1681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274141861435.718, "dur": 30.630, + "args": { + "External id": 503955,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274141861487.939, "dur": 22.732, + "args": { + "External id": 503956,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 1683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274141861529.173, "dur": 16.854, + "args": { + "External id": 503957,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 1684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141861555.301, "dur": 54.459, + "args": { + "External id": 503958,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 1685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141861616.887, "dur": 47.393, + "args": { + "External id": 503959,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 1686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274141861697.808, "dur": 254.552, + "args": { + "External id": 503960,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141861782.958, "dur": 8.262, + "args": { + "External id": 503961,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141861793.087, "dur": 3.608, + "args": { + "External id": 503962,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141861797.899, "dur": 1.796, + "args": { + "External id": 503963,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141861800.875, "dur": 1.465, + "args": { + "External id": 503964,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141861848.405, "dur": 6.592, + "args": { + "External id": 503965,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141861850.526, "dur": 4.249, + "args": { + "External id": 503966,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141861856.913, "dur": 31.360, + "args": { + "External id": 503967,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141861862.837, "dur": 1.844, + "args": { + "External id": 503968,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274141861889.702, "dur": 4.398, + "args": { + "External id": 503969,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141861893.188, "dur": 0.828, + "args": { + "External id": 503970,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274141861895.082, "dur": 13.864, + "args": { + "External id": 503971,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141861897.086, "dur": 0.549, + "args": { + "External id": 503972,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274141861991.496, "dur": 31.288, + "args": { + "External id": 503973,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274141862041.179, "dur": 56.921, + "args": { + "External id": 503974,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141862109.525, "dur": 53.581, + "args": { + "External id": 503975,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 1702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141862172.202, "dur": 42.055, + "args": { + "External id": 503976,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 1703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141862223.320, "dur": 21.116, + "args": { + "External id": 503977,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 1704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141862249.706, "dur": 32.212, + "args": { + "External id": 503978,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 1705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141862288.925, "dur": 28.675, + "args": { + "External id": 503979,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 1706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274141862326.469, "dur": 31.295, + "args": { + "External id": 503980,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 1707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274141862382.338, "dur": 25.384, + "args": { + "External id": 503981,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 1708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274141862427.849, "dur": 24.242, + "args": { + "External id": 503982,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274141862467.998, "dur": 17.079, + "args": { + "External id": 503983,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 1710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274141862504.507, "dur": 13.923, + "args": { + "External id": 503984,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 1711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274141862534.555, "dur": 17.578, + "args": { + "External id": 503985,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 1712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141862658.757, "dur": 23.707, + "args": { + "External id": 503986,"Record function id": 0, "Ev Idx": 1713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141862663.705, "dur": 17.463, + "args": { + "External id": 503987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141862669.482, "dur": 10.811, + "args": { + "External id": 503988,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141862671.420, "dur": 8.728, + "args": { + "External id": 503989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141862686.729, "dur": 5.572, + "args": { + "External id": 503990,"Record function id": 0, "Ev Idx": 1717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141862688.554, "dur": 3.137, + "args": { + "External id": 503991,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141862689.314, "dur": 1.943, + "args": { + "External id": 503992,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141862690.059, "dur": 1.111, + "args": { + "External id": 503993,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141862695.658, "dur": 4.225, + "args": { + "External id": 503994,"Record function id": 0, "Ev Idx": 1721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141862696.954, "dur": 2.487, + "args": { + "External id": 503995,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141862697.654, "dur": 1.375, + "args": { + "External id": 503996,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141862698.356, "dur": 0.545, + "args": { + "External id": 503997,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141862703.189, "dur": 4.173, + "args": { + "External id": 503998,"Record function id": 0, "Ev Idx": 1725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141862704.521, "dur": 2.438, + "args": { + "External id": 503999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141862705.574, "dur": 1.008, + "args": { + "External id": 504000,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141862705.890, "dur": 0.598, + "args": { + "External id": 504001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141862710.449, "dur": 14.018, + "args": { + "External id": 504002,"Record function id": 0, "Ev Idx": 1729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141862711.542, "dur": 12.480, + "args": { + "External id": 504003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141862722.329, "dur": 1.131, + "args": { + "External id": 504004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141862722.645, "dur": 0.740, + "args": { + "External id": 504005,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141862727.411, "dur": 6.656, + "args": { + "External id": 504006,"Record function id": 0, "Ev Idx": 1733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141862728.764, "dur": 4.901, + "args": { + "External id": 504007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141862729.245, "dur": 4.046, + "args": { + "External id": 504008,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141862732.420, "dur": 0.762, + "args": { + "External id": 504009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141862738.651, "dur": 3.805, + "args": { + "External id": 504010,"Record function id": 0, "Ev Idx": 1737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141862740.016, "dur": 2.031, + "args": { + "External id": 504011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141862740.480, "dur": 1.129, + "args": { + "External id": 504012,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141862740.950, "dur": 0.590, + "args": { + "External id": 504013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141862745.490, "dur": 6.700, + "args": { + "External id": 504014,"Record function id": 0, "Ev Idx": 1741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141862746.625, "dur": 5.146, + "args": { + "External id": 504015,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141862747.109, "dur": 4.113, + "args": { + "External id": 504016,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141862747.772, "dur": 3.380, + "args": { + "External id": 504017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141862755.335, "dur": 4.303, + "args": { + "External id": 504018,"Record function id": 0, "Ev Idx": 1745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274141862756.573, "dur": 2.683, + "args": { + "External id": 504019,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141862757.332, "dur": 1.528, + "args": { + "External id": 504020,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274141862757.871, "dur": 0.910, + "args": { + "External id": 504021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141862763.305, "dur": 294489.343, + "args": { + "External id": 504022,"Record function id": 0, "Sequence number": 5516061, "Fwd thread id": 1, "Ev Idx": 1749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274141862764.435, "dur": 294477.811, + "args": { + "External id": 504023,"Sequence number": 5516061, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1750 + } + }, + { + "ph": "f", "id": 173, "pid": 2338711, "tid": 2379440, "ts": 6274141862764.435, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.25)", "pid": 2338711, "tid": 2379440, + "ts": 6274141862801.161, "dur": 43.969, + "args": { + "External id": 504024,"Record function id": 0, "Ev Idx": 1751 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.25)", "pid": 2338711, "tid": 2379440, + "ts": 6274141862852.886, "dur": 71.231, + "args": { + "External id": 504025,"Record function id": 0, "Ev Idx": 1752 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.25)", "pid": 2338711, "tid": 2379440, + "ts": 6274141862930.094, "dur": 294302.819, + "args": { + "External id": 504026,"Record function id": 0, "Ev Idx": 1753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141862989.463, "dur": 9.104, + "args": { + "External id": 504027,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274141863008.690, "dur": 4.522, + "args": { + "External id": 504028,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 1755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274141863034.029, "dur": 293250.913, + "args": { + "External id": 504029,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 1756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274141863050.017, "dur": 293219.179, + "args": { + "External id": 504030,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 1757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274141863240.486, "dur": 6.934, + "args": { + "External id": 504031,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274141863276.973, "dur": 292938.833, + "args": { + "External id": 504032,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 1759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274141863280.175, "dur": 292934.234, + "args": { + "External id": 504033,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 1760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274141863285.041, "dur": 17.323, + "args": { + "External id": 504034,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274141863304.526, "dur": 292902.369, + "args": { + "External id": 504035,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 1762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142156408.443, "dur": 18.581, + "args": { + "External id": 504036,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 1763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142156416.259, "dur": 10.354, + "args": { + "External id": 504037,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142156468.538, "dur": 365.484, + "args": { + "External id": 504038,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 1765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142156503.944, "dur": 324.130, + "args": { + "External id": 504039,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1766, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142156516.712, "dur": 304.696, + "args": { + "External id": 504040,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 1767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142156862.326, "dur": 2.727, + "args": { + "External id": 504041,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1768, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142156936.361, "dur": 7.832, + "args": { + "External id": 504042,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142157002.840, "dur": 1.479, + "args": { + "External id": 504043,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142157022.233, "dur": 1.125, + "args": { + "External id": 504044,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142157038.987, "dur": 1.046, + "args": { + "External id": 504045,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142157093.302, "dur": 1.886, + "args": { + "External id": 504046,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142157114.035, "dur": 0.972, + "args": { + "External id": 504047,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142157127.703, "dur": 0.981, + "args": { + "External id": 504048,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142157144.375, "dur": 2.269, + "args": { + "External id": 504049,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142157158.001, "dur": 0.970, + "args": { + "External id": 504050,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142157270.309, "dur": 3243.426, + "args": { + "External id": 504051,"Record function id": 0, "Ev Idx": 1778 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.24)", "pid": 2338711, "tid": 2379440, + "ts": 6274142157293.133, "dur": 1218.720, + "args": { + "External id": 504052,"Record function id": 0, "Ev Idx": 1779 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2338711, "tid": 2379440, + "ts": 6274142157309.743, "dur": 409.426, + "args": { + "External id": 504053,"Record function id": 0, "Ev Idx": 1780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142157403.422, "dur": 4.622, + "args": { + "External id": 504054,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 1781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142157412.305, "dur": 0.907, + "args": { + "External id": 504055,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 1782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142157415.523, "dur": 0.814, + "args": { + "External id": 504056,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 1783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142157418.157, "dur": 1.233, + "args": { + "External id": 504057,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142157420.991, "dur": 1.101, + "args": { + "External id": 504058,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142157426.100, "dur": 0.769, + "args": { + "External id": 504059,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 1786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142157428.533, "dur": 2.862, + "args": { + "External id": 504060,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 1787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142157432.859, "dur": 3.469, + "args": { + "External id": 504061,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142157438.187, "dur": 1.037, + "args": { + "External id": 504062,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142157442.871, "dur": 0.875, + "args": { + "External id": 504063,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142157462.678, "dur": 215.382, + "args": { + "External id": 504064,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 1791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142157480.204, "dur": 191.255, + "args": { + "External id": 504065,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 1792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142157507.799, "dur": 15.354, + "args": { + "External id": 504066,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142157527.112, "dur": 99.301, + "args": { + "External id": 504067,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 1794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142157529.889, "dur": 95.952, + "args": { + "External id": 504068,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 1795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142157535.815, "dur": 6.674, + "args": { + "External id": 504069,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142157544.195, "dur": 79.973, + "args": { + "External id": 504070,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 1797 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.23", "pid": 2338711, "tid": 2379440, + "ts": 6274142157818.000, "dur": 685.892, + "args": { + "External id": 504071,"Record function id": 0, "Ev Idx": 1798 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2338711, "tid": 2379440, + "ts": 6274142157836.988, "dur": 653.973, + "args": { + "External id": 504072,"Record function id": 0, "Ev Idx": 1799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142157898.664, "dur": 6.813, + "args": { + "External id": 504073,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142157921.936, "dur": 39.006, + "args": { + "External id": 504074,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142157927.610, "dur": 2.374, + "args": { + "External id": 504075,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142157932.184, "dur": 2.307, + "args": { + "External id": 504076,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142157936.411, "dur": 2.673, + "args": { + "External id": 504077,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142157941.034, "dur": 0.518, + "args": { + "External id": 504078,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142157945.185, "dur": 0.348, + "args": { + "External id": 504079,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142157947.021, "dur": 0.637, + "args": { + "External id": 504080,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142157949.314, "dur": 0.488, + "args": { + "External id": 504081,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142157953.018, "dur": 0.524, + "args": { + "External id": 504082,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142157955.089, "dur": 0.486, + "args": { + "External id": 504083,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142157972.167, "dur": 45.982, + "args": { + "External id": 504084,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274142158050.777, "dur": 182.110, + "args": { + "External id": 504085,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 1812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142158102.763, "dur": 5.919, + "args": { + "External id": 504086,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274142158114.774, "dur": 20.699, + "args": { + "External id": 504087,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274142158125.727, "dur": 9.243, + "args": { + "External id": 504088,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 1815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142158130.450, "dur": 2.893, + "args": { + "External id": 504089,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142158143.553, "dur": 30.923, + "args": { + "External id": 504090,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142158146.476, "dur": 0.517, + "args": { + "External id": 504091,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142158150.859, "dur": 0.565, + "args": { + "External id": 504092,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142158152.943, "dur": 0.301, + "args": { + "External id": 504093,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142158155.140, "dur": 1.776, + "args": { + "External id": 504094,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142158158.306, "dur": 0.342, + "args": { + "External id": 504095,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142158160.216, "dur": 0.372, + "args": { + "External id": 504096,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142158163.611, "dur": 0.540, + "args": { + "External id": 504097,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142158165.498, "dur": 2.530, + "args": { + "External id": 504098,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142158169.642, "dur": 0.502, + "args": { + "External id": 504099,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142158189.030, "dur": 35.036, + "args": { + "External id": 504100,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142158281.998, "dur": 131.018, + "args": { + "External id": 504101,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 1828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142158317.323, "dur": 92.209, + "args": { + "External id": 504102,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1829, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142158327.655, "dur": 77.656, + "args": { + "External id": 504103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 1830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142158433.407, "dur": 1.887, + "args": { + "External id": 504104,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1831, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142158519.948, "dur": 1970.893, + "args": { + "External id": 504105,"Sequence number": 5516060, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1832 + } + }, + { + "ph": "f", "id": 174, "pid": 2338711, "tid": 2379440, "ts": 6274142158519.948, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142158671.407, "dur": 125.994, + "args": { + "External id": 504106,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 1833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142158846.049, "dur": 41.256, + "args": { + "External id": 504107,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 1834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274142158907.737, "dur": 51.111, + "args": { + "External id": 504108,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 1835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142158969.269, "dur": 32.530, + "args": { + "External id": 504109,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142159009.723, "dur": 33.429, + "args": { + "External id": 504110,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142159051.185, "dur": 71.225, + "args": { + "External id": 504111,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142159136.402, "dur": 32.197, + "args": { + "External id": 504112,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142159198.930, "dur": 28.355, + "args": { + "External id": 504113,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 1840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142159251.412, "dur": 33.144, + "args": { + "External id": 504114,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142159307.155, "dur": 21.461, + "args": { + "External id": 504115,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 1842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142159346.961, "dur": 19.572, + "args": { + "External id": 504116,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 1843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142159377.062, "dur": 37.653, + "args": { + "External id": 504117,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 1844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142159418.350, "dur": 32.576, + "args": { + "External id": 504118,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 1845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274142159481.624, "dur": 306.348, + "args": { + "External id": 504119,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142159568.183, "dur": 7.080, + "args": { + "External id": 504120,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142159577.206, "dur": 2.280, + "args": { + "External id": 504121,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142159581.118, "dur": 23.554, + "args": { + "External id": 504122,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142159607.967, "dur": 5.896, + "args": { + "External id": 504123,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142159673.738, "dur": 6.047, + "args": { + "External id": 504124,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142159676.202, "dur": 3.355, + "args": { + "External id": 504125,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142159682.036, "dur": 33.906, + "args": { + "External id": 504126,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142159688.214, "dur": 2.739, + "args": { + "External id": 504127,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142159717.916, "dur": 1.843, + "args": { + "External id": 504128,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142159719.169, "dur": 0.481, + "args": { + "External id": 504129,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142159720.729, "dur": 16.404, + "args": { + "External id": 504130,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142159724.842, "dur": 0.899, + "args": { + "External id": 504131,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142159831.838, "dur": 31.784, + "args": { + "External id": 504132,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142159882.701, "dur": 18.042, + "args": { + "External id": 504133,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142159909.506, "dur": 55.592, + "args": { + "External id": 504134,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 1861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142159971.871, "dur": 42.578, + "args": { + "External id": 504135,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 1862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142160025.401, "dur": 21.019, + "args": { + "External id": 504136,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 1863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142160112.173, "dur": 50.970, + "args": { + "External id": 504137,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 1864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142160175.142, "dur": 33.569, + "args": { + "External id": 504138,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 1865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142160215.859, "dur": 32.288, + "args": { + "External id": 504139,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 1866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274142160276.152, "dur": 34.461, + "args": { + "External id": 504140,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 1867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142160328.160, "dur": 26.225, + "args": { + "External id": 504141,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142160372.452, "dur": 19.002, + "args": { + "External id": 504142,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 1869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142160408.954, "dur": 15.568, + "args": { + "External id": 504143,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 1870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274142160440.181, "dur": 19.021, + "args": { + "External id": 504144,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 1871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142160538.604, "dur": 16.106, + "args": { + "External id": 504145,"Record function id": 0, "Ev Idx": 1872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142160542.247, "dur": 11.422, + "args": { + "External id": 504146,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142160547.010, "dur": 5.651, + "args": { + "External id": 504147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142160548.448, "dur": 4.093, + "args": { + "External id": 504148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142160558.580, "dur": 7.549, + "args": { + "External id": 504149,"Record function id": 0, "Ev Idx": 1876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142160560.078, "dur": 5.611, + "args": { + "External id": 504150,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142160560.916, "dur": 4.345, + "args": { + "External id": 504151,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142160561.716, "dur": 3.400, + "args": { + "External id": 504152,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142160569.513, "dur": 4.803, + "args": { + "External id": 504153,"Record function id": 0, "Ev Idx": 1880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142160570.909, "dur": 2.943, + "args": { + "External id": 504154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142160571.649, "dur": 1.765, + "args": { + "External id": 504155,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142160572.463, "dur": 0.842, + "args": { + "External id": 504156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142160577.601, "dur": 4.551, + "args": { + "External id": 504157,"Record function id": 0, "Ev Idx": 1884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142160578.932, "dur": 2.806, + "args": { + "External id": 504158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142160579.936, "dur": 1.348, + "args": { + "External id": 504159,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142160580.496, "dur": 0.681, + "args": { + "External id": 504160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142160606.001, "dur": 10.131, + "args": { + "External id": 504161,"Record function id": 0, "Ev Idx": 1888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142160609.210, "dur": 5.939, + "args": { + "External id": 504162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142160610.892, "dur": 2.899, + "args": { + "External id": 504163,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142160611.767, "dur": 1.749, + "args": { + "External id": 504164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142160621.658, "dur": 6.794, + "args": { + "External id": 504165,"Record function id": 0, "Ev Idx": 1892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142160623.695, "dur": 4.066, + "args": { + "External id": 504166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142160624.934, "dur": 2.212, + "args": { + "External id": 504167,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142160625.957, "dur": 1.035, + "args": { + "External id": 504168,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142160633.312, "dur": 17.026, + "args": { + "External id": 504169,"Record function id": 0, "Ev Idx": 1896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142160635.665, "dur": 13.978, + "args": { + "External id": 504170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142160636.597, "dur": 12.455, + "args": { + "External id": 504171,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142160647.443, "dur": 1.432, + "args": { + "External id": 504172,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142160655.103, "dur": 6.540, + "args": { + "External id": 504173,"Record function id": 0, "Ev Idx": 1900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142160657.321, "dur": 3.642, + "args": { + "External id": 504174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142160658.577, "dur": 1.773, + "args": { + "External id": 504175,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142160659.214, "dur": 0.989, + "args": { + "External id": 504176,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142160666.204, "dur": 8.610, + "args": { + "External id": 504177,"Record function id": 0, "Ev Idx": 1904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142160668.250, "dur": 5.890, + "args": { + "External id": 504178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142160669.462, "dur": 3.850, + "args": { + "External id": 504179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142160670.112, "dur": 3.125, + "args": { + "External id": 504180,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142160679.087, "dur": 64062.157, + "args": { + "External id": 504181,"Record function id": 0, "Sequence number": 5516059, "Fwd thread id": 1, "Ev Idx": 1908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142160680.368, "dur": 64051.269, + "args": { + "External id": 504182,"Sequence number": 5516059, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1909 + } + }, + { + "ph": "f", "id": 175, "pid": 2338711, "tid": 2379440, "ts": 6274142160680.368, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.24)", "pid": 2338711, "tid": 2379440, + "ts": 6274142160719.086, "dur": 44.778, + "args": { + "External id": 504183,"Record function id": 0, "Ev Idx": 1910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.24)", "pid": 2338711, "tid": 2379440, + "ts": 6274142160772.226, "dur": 69.718, + "args": { + "External id": 504184,"Record function id": 0, "Ev Idx": 1911 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.24)", "pid": 2338711, "tid": 2379440, + "ts": 6274142160848.363, "dur": 63875.107, + "args": { + "External id": 504185,"Record function id": 0, "Ev Idx": 1912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142160948.282, "dur": 9.214, + "args": { + "External id": 504186,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142160967.755, "dur": 5.313, + "args": { + "External id": 504187,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 1914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142160990.177, "dur": 62762.473, + "args": { + "External id": 504188,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 1915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142161005.332, "dur": 62731.979, + "args": { + "External id": 504189,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 1916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142161196.254, "dur": 20.540, + "args": { + "External id": 504190,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142161245.569, "dur": 62441.612, + "args": { + "External id": 504191,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 1918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142161248.563, "dur": 62436.968, + "args": { + "External id": 504192,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 1919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142161253.637, "dur": 14.883, + "args": { + "External id": 504193,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142161270.614, "dur": 62408.297, + "args": { + "External id": 504194,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 1921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142223869.511, "dur": 13.685, + "args": { + "External id": 504195,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 1922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142223873.991, "dur": 8.738, + "args": { + "External id": 504196,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142223917.929, "dur": 428.018, + "args": { + "External id": 504197,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 1924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142223954.928, "dur": 385.053, + "args": { + "External id": 504198,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1925, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142223968.625, "dur": 364.015, + "args": { + "External id": 504199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 1926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142224374.272, "dur": 2.700, + "args": { + "External id": 504200,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1927, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142224446.707, "dur": 6.974, + "args": { + "External id": 504201,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142224504.016, "dur": 3.632, + "args": { + "External id": 504202,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142224524.931, "dur": 1.030, + "args": { + "External id": 504203,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142224539.212, "dur": 0.869, + "args": { + "External id": 504204,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142224556.703, "dur": 0.827, + "args": { + "External id": 504205,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142224569.988, "dur": 3.621, + "args": { + "External id": 504206,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142224605.499, "dur": 2.098, + "args": { + "External id": 504207,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142224628.979, "dur": 3.100, + "args": { + "External id": 504208,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142224647.167, "dur": 1.482, + "args": { + "External id": 504209,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142224759.134, "dur": 3208.588, + "args": { + "External id": 504210,"Record function id": 0, "Ev Idx": 1937 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.23)", "pid": 2338711, "tid": 2379440, + "ts": 6274142224781.217, "dur": 1206.722, + "args": { + "External id": 504211,"Record function id": 0, "Ev Idx": 1938 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2338711, "tid": 2379440, + "ts": 6274142224801.150, "dur": 421.197, + "args": { + "External id": 504212,"Record function id": 0, "Ev Idx": 1939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142224896.484, "dur": 6.868, + "args": { + "External id": 504213,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 1940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142224907.039, "dur": 1.010, + "args": { + "External id": 504214,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 1941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142224910.000, "dur": 0.967, + "args": { + "External id": 504215,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 1942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142224912.645, "dur": 0.954, + "args": { + "External id": 504216,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142224917.311, "dur": 0.691, + "args": { + "External id": 504217,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142224919.305, "dur": 0.672, + "args": { + "External id": 504218,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 1945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142224921.485, "dur": 2.258, + "args": { + "External id": 504219,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 1946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142224925.125, "dur": 0.855, + "args": { + "External id": 504220,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142224929.552, "dur": 2.767, + "args": { + "External id": 504221,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142224933.831, "dur": 0.629, + "args": { + "External id": 504222,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142224967.281, "dur": 214.647, + "args": { + "External id": 504223,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 1950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142224985.500, "dur": 190.547, + "args": { + "External id": 504224,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 1951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142225008.780, "dur": 15.510, + "args": { + "External id": 504225,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142225028.355, "dur": 116.286, + "args": { + "External id": 504226,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 1953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142225033.752, "dur": 110.521, + "args": { + "External id": 504227,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 1954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142225038.500, "dur": 7.052, + "args": { + "External id": 504228,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142225047.284, "dur": 95.761, + "args": { + "External id": 504229,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 1956 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.22", "pid": 2338711, "tid": 2379440, + "ts": 6274142225317.067, "dur": 662.818, + "args": { + "External id": 504230,"Record function id": 0, "Ev Idx": 1957 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2338711, "tid": 2379440, + "ts": 6274142225336.720, "dur": 630.286, + "args": { + "External id": 504231,"Record function id": 0, "Ev Idx": 1958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142225395.019, "dur": 6.925, + "args": { + "External id": 504232,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142225418.453, "dur": 40.504, + "args": { + "External id": 504233,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142225424.049, "dur": 2.013, + "args": { + "External id": 504234,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142225428.584, "dur": 2.067, + "args": { + "External id": 504235,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142225432.017, "dur": 0.625, + "args": { + "External id": 504236,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142225433.920, "dur": 2.241, + "args": { + "External id": 504237,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142225443.750, "dur": 0.285, + "args": { + "External id": 504238,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142225445.456, "dur": 0.287, + "args": { + "External id": 504239,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142225447.510, "dur": 0.351, + "args": { + "External id": 504240,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142225451.000, "dur": 0.442, + "args": { + "External id": 504241,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142225452.977, "dur": 0.349, + "args": { + "External id": 504242,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142225470.350, "dur": 44.035, + "args": { + "External id": 504243,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274142225547.283, "dur": 158.488, + "args": { + "External id": 504244,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 1971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142225557.670, "dur": 3.480, + "args": { + "External id": 504245,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274142225566.440, "dur": 10.966, + "args": { + "External id": 504246,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274142225571.226, "dur": 5.767, + "args": { + "External id": 504247,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 1974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142225575.092, "dur": 0.582, + "args": { + "External id": 504248,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142225603.307, "dur": 42.399, + "args": { + "External id": 504249,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142225607.590, "dur": 3.405, + "args": { + "External id": 504250,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142225615.197, "dur": 0.538, + "args": { + "External id": 504251,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142225617.987, "dur": 0.471, + "args": { + "External id": 504252,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142225620.726, "dur": 2.023, + "args": { + "External id": 504253,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142225624.997, "dur": 0.489, + "args": { + "External id": 504254,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142225627.576, "dur": 0.553, + "args": { + "External id": 504255,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142225631.470, "dur": 0.727, + "args": { + "External id": 504256,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142225634.441, "dur": 0.814, + "args": { + "External id": 504257,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142225637.574, "dur": 2.600, + "args": { + "External id": 504258,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142225660.997, "dur": 35.138, + "args": { + "External id": 504259,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142225756.560, "dur": 133.155, + "args": { + "External id": 504260,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 1987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142225792.379, "dur": 93.432, + "args": { + "External id": 504261,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1988, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142225803.343, "dur": 78.289, + "args": { + "External id": 504262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 1989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142225908.954, "dur": 1.912, + "args": { + "External id": 504263,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1990, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142225995.349, "dur": 1949.279, + "args": { + "External id": 504264,"Sequence number": 5516058, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1991 + } + }, + { + "ph": "f", "id": 176, "pid": 2338711, "tid": 2379440, "ts": 6274142225995.349, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142226153.069, "dur": 116.553, + "args": { + "External id": 504265,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 1992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142226315.033, "dur": 39.623, + "args": { + "External id": 504266,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 1993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274142226374.250, "dur": 50.642, + "args": { + "External id": 504267,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 1994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142226435.507, "dur": 32.557, + "args": { + "External id": 504268,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142226475.259, "dur": 33.531, + "args": { + "External id": 504269,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142226516.730, "dur": 29.699, + "args": { + "External id": 504270,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142226555.424, "dur": 48.917, + "args": { + "External id": 504271,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142226642.876, "dur": 35.178, + "args": { + "External id": 504272,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 1999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142226702.094, "dur": 34.582, + "args": { + "External id": 504273,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142226763.544, "dur": 21.842, + "args": { + "External id": 504274,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142226797.853, "dur": 17.788, + "args": { + "External id": 504275,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142226825.393, "dur": 44.858, + "args": { + "External id": 504276,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142226873.794, "dur": 34.298, + "args": { + "External id": 504277,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274142226939.124, "dur": 331.400, + "args": { + "External id": 504278,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142227026.215, "dur": 8.804, + "args": { + "External id": 504279,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142227037.415, "dur": 3.069, + "args": { + "External id": 504280,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142227041.676, "dur": 2.139, + "args": { + "External id": 504281,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142227045.158, "dur": 3.885, + "args": { + "External id": 504282,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142227155.446, "dur": 5.785, + "args": { + "External id": 504283,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142227157.703, "dur": 3.173, + "args": { + "External id": 504284,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142227163.254, "dur": 36.357, + "args": { + "External id": 504285,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142227169.424, "dur": 3.731, + "args": { + "External id": 504286,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142227201.228, "dur": 1.624, + "args": { + "External id": 504287,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142227202.322, "dur": 0.456, + "args": { + "External id": 504288,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142227204.256, "dur": 16.130, + "args": { + "External id": 504289,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142227206.336, "dur": 0.568, + "args": { + "External id": 504290,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142227311.148, "dur": 30.138, + "args": { + "External id": 504291,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142227361.283, "dur": 18.921, + "args": { + "External id": 504292,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142227387.471, "dur": 51.919, + "args": { + "External id": 504293,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142227446.099, "dur": 59.423, + "args": { + "External id": 504294,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142227523.866, "dur": 26.813, + "args": { + "External id": 504295,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142227556.332, "dur": 57.661, + "args": { + "External id": 504296,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142227627.888, "dur": 43.597, + "args": { + "External id": 504297,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142227678.298, "dur": 32.100, + "args": { + "External id": 504298,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274142227737.432, "dur": 28.869, + "args": { + "External id": 504299,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142227784.093, "dur": 25.738, + "args": { + "External id": 504300,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142227826.807, "dur": 18.497, + "args": { + "External id": 504301,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142227863.911, "dur": 15.790, + "args": { + "External id": 504302,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274142227895.207, "dur": 17.996, + "args": { + "External id": 504303,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142227991.664, "dur": 17.046, + "args": { + "External id": 504304,"Record function id": 0, "Ev Idx": 2031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142227995.889, "dur": 11.855, + "args": { + "External id": 504305,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142228000.593, "dur": 6.249, + "args": { + "External id": 504306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142228002.431, "dur": 4.289, + "args": { + "External id": 504307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142228012.857, "dur": 4.823, + "args": { + "External id": 504308,"Record function id": 0, "Ev Idx": 2035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142228014.343, "dur": 2.928, + "args": { + "External id": 504309,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142228015.136, "dur": 1.677, + "args": { + "External id": 504310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142228015.760, "dur": 0.935, + "args": { + "External id": 504311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142228020.880, "dur": 6.160, + "args": { + "External id": 504312,"Record function id": 0, "Ev Idx": 2039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142228022.074, "dur": 4.535, + "args": { + "External id": 504313,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142228022.728, "dur": 3.508, + "args": { + "External id": 504314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142228023.473, "dur": 2.648, + "args": { + "External id": 504315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142228030.183, "dur": 4.454, + "args": { + "External id": 504316,"Record function id": 0, "Ev Idx": 2043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142228031.839, "dur": 2.319, + "args": { + "External id": 504317,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142228032.371, "dur": 1.399, + "args": { + "External id": 504318,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142228032.677, "dur": 1.020, + "args": { + "External id": 504319,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142228037.727, "dur": 75.202, + "args": { + "External id": 504320,"Record function id": 0, "Ev Idx": 2047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142228106.413, "dur": 5.297, + "args": { + "External id": 504321,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142228108.666, "dur": 2.094, + "args": { + "External id": 504322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142228109.167, "dur": 1.338, + "args": { + "External id": 504323,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142228118.070, "dur": 13.442, + "args": { + "External id": 504324,"Record function id": 0, "Ev Idx": 2051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142228126.250, "dur": 4.762, + "args": { + "External id": 504325,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142228126.964, "dur": 3.649, + "args": { + "External id": 504326,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142228129.884, "dur": 0.620, + "args": { + "External id": 504327,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142228135.920, "dur": 3.907, + "args": { + "External id": 504328,"Record function id": 0, "Ev Idx": 2055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142228137.220, "dur": 2.216, + "args": { + "External id": 504329,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142228137.927, "dur": 1.100, + "args": { + "External id": 504330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142228138.300, "dur": 0.624, + "args": { + "External id": 504331,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142228142.884, "dur": 3.543, + "args": { + "External id": 504332,"Record function id": 0, "Ev Idx": 2059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142228144.030, "dur": 1.975, + "args": { + "External id": 504333,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142228144.486, "dur": 1.112, + "args": { + "External id": 504334,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142228144.866, "dur": 0.655, + "args": { + "External id": 504335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142228149.443, "dur": 3.877, + "args": { + "External id": 504336,"Record function id": 0, "Ev Idx": 2063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142228150.822, "dur": 2.070, + "args": { + "External id": 504337,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142228151.409, "dur": 1.099, + "args": { + "External id": 504338,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142228151.848, "dur": 0.587, + "args": { + "External id": 504339,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142228157.131, "dur": 62651.732, + "args": { + "External id": 504340,"Record function id": 0, "Sequence number": 5516057, "Fwd thread id": 1, "Ev Idx": 2067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142228161.078, "dur": 62638.181, + "args": { + "External id": 504341,"Sequence number": 5516057, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2068 + } + }, + { + "ph": "f", "id": 177, "pid": 2338711, "tid": 2379440, "ts": 6274142228161.078, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.23)", "pid": 2338711, "tid": 2379440, + "ts": 6274142228199.620, "dur": 44.595, + "args": { + "External id": 504342,"Record function id": 0, "Ev Idx": 2069 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.23)", "pid": 2338711, "tid": 2379440, + "ts": 6274142228252.480, "dur": 71.440, + "args": { + "External id": 504343,"Record function id": 0, "Ev Idx": 2070 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.23)", "pid": 2338711, "tid": 2379440, + "ts": 6274142228329.589, "dur": 62459.999, + "args": { + "External id": 504344,"Record function id": 0, "Ev Idx": 2071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142228426.722, "dur": 8.391, + "args": { + "External id": 504345,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142228445.552, "dur": 6.941, + "args": { + "External id": 504346,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142228471.433, "dur": 61286.715, + "args": { + "External id": 504347,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142228486.010, "dur": 61256.750, + "args": { + "External id": 504348,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142228607.886, "dur": 27.312, + "args": { + "External id": 504349,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142228665.452, "dur": 61025.828, + "args": { + "External id": 504350,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142228669.674, "dur": 61020.094, + "args": { + "External id": 504351,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142228675.012, "dur": 16.025, + "args": { + "External id": 504352,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142228692.993, "dur": 60990.203, + "args": { + "External id": 504353,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142289873.825, "dur": 14.372, + "args": { + "External id": 504354,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142289878.056, "dur": 9.736, + "args": { + "External id": 504355,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142289922.673, "dur": 470.868, + "args": { + "External id": 504356,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142289957.454, "dur": 429.878, + "args": { + "External id": 504357,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2084, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142289971.134, "dur": 409.404, + "args": { + "External id": 504358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142290423.094, "dur": 2.591, + "args": { + "External id": 504359,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2086, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142290493.308, "dur": 7.564, + "args": { + "External id": 504360,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142290549.438, "dur": 1.616, + "args": { + "External id": 504361,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142290568.096, "dur": 3.839, + "args": { + "External id": 504362,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142290612.521, "dur": 2.082, + "args": { + "External id": 504363,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142290640.705, "dur": 1.685, + "args": { + "External id": 504364,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142290657.623, "dur": 1.029, + "args": { + "External id": 504365,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142290674.898, "dur": 3.656, + "args": { + "External id": 504366,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142290693.483, "dur": 2.689, + "args": { + "External id": 504367,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142290713.936, "dur": 1.333, + "args": { + "External id": 504368,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142290827.067, "dur": 3184.291, + "args": { + "External id": 504369,"Record function id": 0, "Ev Idx": 2096 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.22)", "pid": 2338711, "tid": 2379440, + "ts": 6274142290847.686, "dur": 1191.421, + "args": { + "External id": 504370,"Record function id": 0, "Ev Idx": 2097 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2338711, "tid": 2379440, + "ts": 6274142290865.804, "dur": 409.576, + "args": { + "External id": 504371,"Record function id": 0, "Ev Idx": 2098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142290957.166, "dur": 4.516, + "args": { + "External id": 504372,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142290965.600, "dur": 0.907, + "args": { + "External id": 504373,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142290968.501, "dur": 3.067, + "args": { + "External id": 504374,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142290973.584, "dur": 0.797, + "args": { + "External id": 504375,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142290975.951, "dur": 0.971, + "args": { + "External id": 504376,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142290978.728, "dur": 1.133, + "args": { + "External id": 504377,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142290981.543, "dur": 2.411, + "args": { + "External id": 504378,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142290987.546, "dur": 1.951, + "args": { + "External id": 504379,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142290991.273, "dur": 0.767, + "args": { + "External id": 504380,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142290993.768, "dur": 1.017, + "args": { + "External id": 504381,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142291012.948, "dur": 225.126, + "args": { + "External id": 504382,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142291033.318, "dur": 198.816, + "args": { + "External id": 504383,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142291103.589, "dur": 17.906, + "args": { + "External id": 504384,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142291125.995, "dur": 74.937, + "args": { + "External id": 504385,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142291130.682, "dur": 69.895, + "args": { + "External id": 504386,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142291135.056, "dur": 7.744, + "args": { + "External id": 504387,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142291144.489, "dur": 55.407, + "args": { + "External id": 504388,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2115 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.21", "pid": 2338711, "tid": 2379440, + "ts": 6274142291364.983, "dur": 665.210, + "args": { + "External id": 504389,"Record function id": 0, "Ev Idx": 2116 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2338711, "tid": 2379440, + "ts": 6274142291384.155, "dur": 633.225, + "args": { + "External id": 504390,"Record function id": 0, "Ev Idx": 2117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142291441.791, "dur": 5.095, + "args": { + "External id": 504391,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142291462.719, "dur": 35.245, + "args": { + "External id": 504392,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142291468.738, "dur": 1.985, + "args": { + "External id": 504393,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142291474.445, "dur": 0.456, + "args": { + "External id": 504394,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142291476.679, "dur": 0.564, + "args": { + "External id": 504395,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142291478.478, "dur": 0.333, + "args": { + "External id": 504396,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142291481.616, "dur": 0.407, + "args": { + "External id": 504397,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142291483.392, "dur": 3.063, + "args": { + "External id": 504398,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142291487.822, "dur": 1.361, + "args": { + "External id": 504399,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142291490.505, "dur": 0.341, + "args": { + "External id": 504400,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142291492.438, "dur": 0.268, + "args": { + "External id": 504401,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142291508.222, "dur": 43.865, + "args": { + "External id": 504402,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274142291583.902, "dur": 175.167, + "args": { + "External id": 504403,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142291618.946, "dur": 6.152, + "args": { + "External id": 504404,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274142291631.924, "dur": 13.683, + "args": { + "External id": 504405,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274142291637.266, "dur": 7.772, + "args": { + "External id": 504406,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142291642.216, "dur": 0.678, + "args": { + "External id": 504407,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142291655.453, "dur": 41.834, + "args": { + "External id": 504408,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142291658.842, "dur": 1.898, + "args": { + "External id": 504409,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142291663.011, "dur": 0.664, + "args": { + "External id": 504410,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142291665.826, "dur": 3.394, + "args": { + "External id": 504411,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142291672.851, "dur": 0.709, + "args": { + "External id": 504412,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142291675.973, "dur": 0.937, + "args": { + "External id": 504413,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142291679.151, "dur": 0.838, + "args": { + "External id": 504414,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142291683.891, "dur": 0.719, + "args": { + "External id": 504415,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142291686.694, "dur": 0.681, + "args": { + "External id": 504416,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142291689.646, "dur": 2.084, + "args": { + "External id": 504417,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142291713.760, "dur": 37.007, + "args": { + "External id": 504418,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142291810.283, "dur": 127.666, + "args": { + "External id": 504419,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142291842.298, "dur": 91.980, + "args": { + "External id": 504420,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2147, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142291854.323, "dur": 75.224, + "args": { + "External id": 504421,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142291958.080, "dur": 1.849, + "args": { + "External id": 504422,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2149, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142292047.433, "dur": 1941.085, + "args": { + "External id": 504423,"Sequence number": 5516056, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2150 + } + }, + { + "ph": "f", "id": 178, "pid": 2338711, "tid": 2379440, "ts": 6274142292047.433, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142292205.403, "dur": 119.703, + "args": { + "External id": 504424,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142292369.297, "dur": 38.363, + "args": { + "External id": 504425,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274142292424.560, "dur": 49.905, + "args": { + "External id": 504426,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142292497.839, "dur": 33.928, + "args": { + "External id": 504427,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142292538.433, "dur": 35.501, + "args": { + "External id": 504428,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142292580.501, "dur": 59.135, + "args": { + "External id": 504429,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142292654.221, "dur": 41.612, + "args": { + "External id": 504430,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142292732.492, "dur": 29.029, + "args": { + "External id": 504431,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142292782.121, "dur": 30.219, + "args": { + "External id": 504432,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142292835.670, "dur": 22.686, + "args": { + "External id": 504433,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142292871.699, "dur": 17.677, + "args": { + "External id": 504434,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142292897.808, "dur": 40.287, + "args": { + "External id": 504435,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142292941.674, "dur": 33.759, + "args": { + "External id": 504436,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274142293007.339, "dur": 311.787, + "args": { + "External id": 504437,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142293132.742, "dur": 7.803, + "args": { + "External id": 504438,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142293142.905, "dur": 3.882, + "args": { + "External id": 504439,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142293148.424, "dur": 2.798, + "args": { + "External id": 504440,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142293152.344, "dur": 1.707, + "args": { + "External id": 504441,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142293207.399, "dur": 7.729, + "args": { + "External id": 504442,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142293211.999, "dur": 2.905, + "args": { + "External id": 504443,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142293217.270, "dur": 36.382, + "args": { + "External id": 504444,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142293223.248, "dur": 3.597, + "args": { + "External id": 504445,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142293255.379, "dur": 2.017, + "args": { + "External id": 504446,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142293256.673, "dur": 0.640, + "args": { + "External id": 504447,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142293258.759, "dur": 15.956, + "args": { + "External id": 504448,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142293260.807, "dur": 0.505, + "args": { + "External id": 504449,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142293356.233, "dur": 28.816, + "args": { + "External id": 504450,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142293403.379, "dur": 19.029, + "args": { + "External id": 504451,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142293430.063, "dur": 47.269, + "args": { + "External id": 504452,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142293483.464, "dur": 40.467, + "args": { + "External id": 504453,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142293534.371, "dur": 21.990, + "args": { + "External id": 504454,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142293561.811, "dur": 54.464, + "args": { + "External id": 504455,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142293631.303, "dur": 61.653, + "args": { + "External id": 504456,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142293710.095, "dur": 39.094, + "args": { + "External id": 504457,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274142293776.406, "dur": 31.770, + "args": { + "External id": 504458,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142293826.879, "dur": 28.317, + "args": { + "External id": 504459,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142293870.931, "dur": 19.024, + "args": { + "External id": 504460,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142293909.270, "dur": 15.342, + "args": { + "External id": 504461,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274142293939.039, "dur": 17.709, + "args": { + "External id": 504462,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142294035.076, "dur": 15.225, + "args": { + "External id": 504463,"Record function id": 0, "Ev Idx": 2190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142294038.391, "dur": 10.986, + "args": { + "External id": 504464,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142294042.969, "dur": 5.411, + "args": { + "External id": 504465,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142294044.289, "dur": 3.983, + "args": { + "External id": 504466,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142294054.344, "dur": 48.516, + "args": { + "External id": 504467,"Record function id": 0, "Ev Idx": 2194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142294095.463, "dur": 6.094, + "args": { + "External id": 504468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142294097.362, "dur": 3.021, + "args": { + "External id": 504469,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142294098.482, "dur": 1.620, + "args": { + "External id": 504470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142294108.353, "dur": 7.443, + "args": { + "External id": 504471,"Record function id": 0, "Ev Idx": 2198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142294110.085, "dur": 5.201, + "args": { + "External id": 504472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142294110.778, "dur": 4.068, + "args": { + "External id": 504473,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142294111.972, "dur": 2.755, + "args": { + "External id": 504474,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142294119.071, "dur": 4.114, + "args": { + "External id": 504475,"Record function id": 0, "Ev Idx": 2202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142294120.449, "dur": 2.333, + "args": { + "External id": 504476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142294121.153, "dur": 1.215, + "args": { + "External id": 504477,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142294121.707, "dur": 0.580, + "args": { + "External id": 504478,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142294126.214, "dur": 4.339, + "args": { + "External id": 504479,"Record function id": 0, "Ev Idx": 2206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142294127.765, "dur": 2.391, + "args": { + "External id": 504480,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142294128.560, "dur": 1.168, + "args": { + "External id": 504481,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142294128.998, "dur": 0.659, + "args": { + "External id": 504482,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142294133.706, "dur": 3.804, + "args": { + "External id": 504483,"Record function id": 0, "Ev Idx": 2210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142294134.960, "dur": 2.156, + "args": { + "External id": 504484,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142294135.613, "dur": 1.058, + "args": { + "External id": 504485,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142294136.001, "dur": 0.604, + "args": { + "External id": 504486,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142294140.637, "dur": 3.777, + "args": { + "External id": 504487,"Record function id": 0, "Ev Idx": 2214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142294141.912, "dur": 2.118, + "args": { + "External id": 504488,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142294142.398, "dur": 1.215, + "args": { + "External id": 504489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142294142.925, "dur": 0.605, + "args": { + "External id": 504490,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142294147.412, "dur": 6.184, + "args": { + "External id": 504491,"Record function id": 0, "Ev Idx": 2218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142294148.760, "dur": 4.452, + "args": { + "External id": 504492,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142294149.434, "dur": 3.360, + "args": { + "External id": 504493,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142294152.087, "dur": 0.621, + "args": { + "External id": 504494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142294157.965, "dur": 4.222, + "args": { + "External id": 504495,"Record function id": 0, "Ev Idx": 2222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142294159.306, "dur": 2.456, + "args": { + "External id": 504496,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142294159.977, "dur": 1.346, + "args": { + "External id": 504497,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142294160.510, "dur": 0.729, + "args": { + "External id": 504498,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142294166.156, "dur": 63827.951, + "args": { + "External id": 504499,"Record function id": 0, "Sequence number": 5516055, "Fwd thread id": 1, "Ev Idx": 2226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142294167.792, "dur": 63817.819, + "args": { + "External id": 504500,"Sequence number": 5516055, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2227 + } + }, + { + "ph": "f", "id": 179, "pid": 2338711, "tid": 2379440, "ts": 6274142294167.792, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.22)", "pid": 2338711, "tid": 2379440, + "ts": 6274142294201.711, "dur": 45.257, + "args": { + "External id": 504501,"Record function id": 0, "Ev Idx": 2228 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.22)", "pid": 2338711, "tid": 2379440, + "ts": 6274142294255.253, "dur": 65.415, + "args": { + "External id": 504502,"Record function id": 0, "Ev Idx": 2229 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.22)", "pid": 2338711, "tid": 2379440, + "ts": 6274142294326.312, "dur": 63650.252, + "args": { + "External id": 504503,"Record function id": 0, "Ev Idx": 2230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142294423.342, "dur": 7.983, + "args": { + "External id": 504504,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142294442.256, "dur": 6.833, + "args": { + "External id": 504505,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142294467.365, "dur": 62570.248, + "args": { + "External id": 504506,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142294483.877, "dur": 62539.554, + "args": { + "External id": 504507,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142294581.315, "dur": 44.318, + "args": { + "External id": 504508,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142294653.641, "dur": 62316.042, + "args": { + "External id": 504509,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142294657.794, "dur": 62310.708, + "args": { + "External id": 504510,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142294665.081, "dur": 15.286, + "args": { + "External id": 504511,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142294683.134, "dur": 62278.997, + "args": { + "External id": 504512,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142357187.209, "dur": 14.295, + "args": { + "External id": 504513,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142357191.459, "dur": 9.513, + "args": { + "External id": 504514,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142357231.951, "dur": 415.911, + "args": { + "External id": 504515,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142357266.939, "dur": 374.573, + "args": { + "External id": 504516,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2243, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142357280.839, "dur": 353.124, + "args": { + "External id": 504517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142357679.794, "dur": 2.895, + "args": { + "External id": 504518,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2245, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142357750.633, "dur": 7.156, + "args": { + "External id": 504519,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142357806.200, "dur": 1.288, + "args": { + "External id": 504520,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142357824.445, "dur": 3.473, + "args": { + "External id": 504521,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142357839.703, "dur": 1.093, + "args": { + "External id": 504522,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142357854.298, "dur": 1.037, + "args": { + "External id": 504523,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142357867.270, "dur": 0.706, + "args": { + "External id": 504524,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142357879.931, "dur": 3.467, + "args": { + "External id": 504525,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142357894.713, "dur": 2.660, + "args": { + "External id": 504526,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142357910.285, "dur": 0.643, + "args": { + "External id": 504527,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142358008.871, "dur": 3188.491, + "args": { + "External id": 504528,"Record function id": 0, "Ev Idx": 2255 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.21)", "pid": 2338711, "tid": 2379440, + "ts": 6274142358029.420, "dur": 1205.227, + "args": { + "External id": 504529,"Record function id": 0, "Ev Idx": 2256 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2338711, "tid": 2379440, + "ts": 6274142358045.069, "dur": 403.436, + "args": { + "External id": 504530,"Record function id": 0, "Ev Idx": 2257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142358177.432, "dur": 5.545, + "args": { + "External id": 504531,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142358187.009, "dur": 1.107, + "args": { + "External id": 504532,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142358190.253, "dur": 3.270, + "args": { + "External id": 504533,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142358195.276, "dur": 0.907, + "args": { + "External id": 504534,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142358197.906, "dur": 0.673, + "args": { + "External id": 504535,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142358199.986, "dur": 1.047, + "args": { + "External id": 504536,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142358202.805, "dur": 2.294, + "args": { + "External id": 504537,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142358208.666, "dur": 0.822, + "args": { + "External id": 504538,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142358211.154, "dur": 1.002, + "args": { + "External id": 504539,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142358213.729, "dur": 1.060, + "args": { + "External id": 504540,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142358233.467, "dur": 180.330, + "args": { + "External id": 504541,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142358250.095, "dur": 158.769, + "args": { + "External id": 504542,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142358282.612, "dur": 16.684, + "args": { + "External id": 504543,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142358303.028, "dur": 75.013, + "args": { + "External id": 504544,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142358305.656, "dur": 72.014, + "args": { + "External id": 504545,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142358311.454, "dur": 6.218, + "args": { + "External id": 504546,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142358319.411, "dur": 57.637, + "args": { + "External id": 504547,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2274 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.20", "pid": 2338711, "tid": 2379440, + "ts": 6274142358532.263, "dur": 693.747, + "args": { + "External id": 504548,"Record function id": 0, "Ev Idx": 2275 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2338711, "tid": 2379440, + "ts": 6274142358550.066, "dur": 661.287, + "args": { + "External id": 504549,"Record function id": 0, "Ev Idx": 2276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142358630.493, "dur": 8.595, + "args": { + "External id": 504550,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142358658.559, "dur": 44.529, + "args": { + "External id": 504551,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142358665.000, "dur": 2.183, + "args": { + "External id": 504552,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142358670.250, "dur": 2.085, + "args": { + "External id": 504553,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142358674.341, "dur": 0.983, + "args": { + "External id": 504554,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142358677.639, "dur": 0.704, + "args": { + "External id": 504555,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142358682.094, "dur": 0.769, + "args": { + "External id": 504556,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142358684.988, "dur": 3.214, + "args": { + "External id": 504557,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142358690.029, "dur": 0.661, + "args": { + "External id": 504558,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142358693.842, "dur": 0.440, + "args": { + "External id": 504559,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142358696.115, "dur": 0.584, + "args": { + "External id": 504560,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142358714.047, "dur": 49.583, + "args": { + "External id": 504561,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274142358799.388, "dur": 114.404, + "args": { + "External id": 504562,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142358810.285, "dur": 3.963, + "args": { + "External id": 504563,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274142358819.480, "dur": 10.834, + "args": { + "External id": 504564,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274142358824.174, "dur": 5.717, + "args": { + "External id": 504565,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142358827.929, "dur": 0.652, + "args": { + "External id": 504566,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142358838.024, "dur": 28.655, + "args": { + "External id": 504567,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142358840.205, "dur": 0.236, + "args": { + "External id": 504568,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142358842.350, "dur": 0.563, + "args": { + "External id": 504569,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142358844.908, "dur": 2.491, + "args": { + "External id": 504570,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142358849.009, "dur": 0.419, + "args": { + "External id": 504571,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142358851.215, "dur": 0.366, + "args": { + "External id": 504572,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142358853.800, "dur": 0.369, + "args": { + "External id": 504573,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142358855.794, "dur": 1.545, + "args": { + "External id": 504574,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142358859.730, "dur": 0.338, + "args": { + "External id": 504575,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142358861.836, "dur": 0.403, + "args": { + "External id": 504576,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142358876.919, "dur": 28.887, + "args": { + "External id": 504577,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142358965.014, "dur": 164.420, + "args": { + "External id": 504578,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142358991.086, "dur": 133.709, + "args": { + "External id": 504579,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2306, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142359004.373, "dur": 115.204, + "args": { + "External id": 504580,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142359149.721, "dur": 1.812, + "args": { + "External id": 504581,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2308, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142359243.321, "dur": 1931.378, + "args": { + "External id": 504582,"Sequence number": 5516054, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2309 + } + }, + { + "ph": "f", "id": 180, "pid": 2338711, "tid": 2379440, "ts": 6274142359243.321, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142359361.262, "dur": 112.906, + "args": { + "External id": 504583,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142359515.106, "dur": 44.940, + "args": { + "External id": 504584,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274142359576.785, "dur": 93.226, + "args": { + "External id": 504585,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142359689.129, "dur": 36.793, + "args": { + "External id": 504586,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142359732.150, "dur": 35.069, + "args": { + "External id": 504587,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142359773.762, "dur": 29.082, + "args": { + "External id": 504588,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142359809.326, "dur": 31.671, + "args": { + "External id": 504589,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142359871.608, "dur": 27.914, + "args": { + "External id": 504590,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142359922.679, "dur": 31.263, + "args": { + "External id": 504591,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142359977.162, "dur": 19.720, + "args": { + "External id": 504592,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142360011.170, "dur": 14.736, + "args": { + "External id": 504593,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142360035.289, "dur": 80.021, + "args": { + "External id": 504594,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142360121.839, "dur": 37.636, + "args": { + "External id": 504595,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274142360195.317, "dur": 261.241, + "args": { + "External id": 504596,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142360278.615, "dur": 6.988, + "args": { + "External id": 504597,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142360287.825, "dur": 2.362, + "args": { + "External id": 504598,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142360291.756, "dur": 3.042, + "args": { + "External id": 504599,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142360296.423, "dur": 1.631, + "args": { + "External id": 504600,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142360346.552, "dur": 5.949, + "args": { + "External id": 504601,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142360348.817, "dur": 3.490, + "args": { + "External id": 504602,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142360355.471, "dur": 34.699, + "args": { + "External id": 504603,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142360361.470, "dur": 4.227, + "args": { + "External id": 504604,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142360392.611, "dur": 2.122, + "args": { + "External id": 504605,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142360393.805, "dur": 0.811, + "args": { + "External id": 504606,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142360396.163, "dur": 15.575, + "args": { + "External id": 504607,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142360398.691, "dur": 0.616, + "args": { + "External id": 504608,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142360490.634, "dur": 28.889, + "args": { + "External id": 504609,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142360536.065, "dur": 16.745, + "args": { + "External id": 504610,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142360561.763, "dur": 68.476, + "args": { + "External id": 504611,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142360643.494, "dur": 49.734, + "args": { + "External id": 504612,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142360707.139, "dur": 22.291, + "args": { + "External id": 504613,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142360736.591, "dur": 50.845, + "args": { + "External id": 504614,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142360803.954, "dur": 36.647, + "args": { + "External id": 504615,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142360847.996, "dur": 32.487, + "args": { + "External id": 504616,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274142360903.882, "dur": 30.013, + "args": { + "External id": 504617,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142360969.609, "dur": 26.215, + "args": { + "External id": 504618,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142361011.204, "dur": 20.591, + "args": { + "External id": 504619,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142361050.263, "dur": 51.978, + "args": { + "External id": 504620,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274142361121.309, "dur": 19.254, + "args": { + "External id": 504621,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142361221.399, "dur": 16.458, + "args": { + "External id": 504622,"Record function id": 0, "Ev Idx": 2349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142361224.879, "dur": 12.004, + "args": { + "External id": 504623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142361229.313, "dur": 6.703, + "args": { + "External id": 504624,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142361231.259, "dur": 4.641, + "args": { + "External id": 504625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142361241.569, "dur": 5.408, + "args": { + "External id": 504626,"Record function id": 0, "Ev Idx": 2353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142361242.922, "dur": 3.637, + "args": { + "External id": 504627,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142361243.722, "dur": 2.375, + "args": { + "External id": 504628,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142361244.492, "dur": 1.517, + "args": { + "External id": 504629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142361250.144, "dur": 6.387, + "args": { + "External id": 504630,"Record function id": 0, "Ev Idx": 2357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142361251.351, "dur": 4.793, + "args": { + "External id": 504631,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142361251.886, "dur": 3.826, + "args": { + "External id": 504632,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142361252.371, "dur": 3.265, + "args": { + "External id": 504633,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142361259.642, "dur": 4.758, + "args": { + "External id": 504634,"Record function id": 0, "Ev Idx": 2361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142361261.112, "dur": 2.882, + "args": { + "External id": 504635,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142361261.921, "dur": 1.657, + "args": { + "External id": 504636,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142361262.470, "dur": 1.032, + "args": { + "External id": 504637,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142361267.552, "dur": 4.434, + "args": { + "External id": 504638,"Record function id": 0, "Ev Idx": 2365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142361268.842, "dur": 2.722, + "args": { + "External id": 504639,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142361269.405, "dur": 1.744, + "args": { + "External id": 504640,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142361270.283, "dur": 0.804, + "args": { + "External id": 504641,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142361275.084, "dur": 7.360, + "args": { + "External id": 504642,"Record function id": 0, "Ev Idx": 2369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142361276.285, "dur": 5.741, + "args": { + "External id": 504643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142361276.955, "dur": 4.553, + "args": { + "External id": 504644,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142361280.699, "dur": 0.692, + "args": { + "External id": 504645,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142361285.598, "dur": 4.267, + "args": { + "External id": 504646,"Record function id": 0, "Ev Idx": 2373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142361287.089, "dur": 2.364, + "args": { + "External id": 504647,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142361287.577, "dur": 1.449, + "args": { + "External id": 504648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142361288.100, "dur": 0.846, + "args": { + "External id": 504649,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142361292.940, "dur": 3.740, + "args": { + "External id": 504650,"Record function id": 0, "Ev Idx": 2377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142361294.151, "dur": 2.125, + "args": { + "External id": 504651,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142361294.730, "dur": 1.092, + "args": { + "External id": 504652,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142361295.029, "dur": 0.676, + "args": { + "External id": 504653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142361300.923, "dur": 3.899, + "args": { + "External id": 504654,"Record function id": 0, "Ev Idx": 2381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142361302.123, "dur": 2.293, + "args": { + "External id": 504655,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142361302.584, "dur": 1.402, + "args": { + "External id": 504656,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142361303.223, "dur": 0.618, + "args": { + "External id": 504657,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142361308.734, "dur": 64994.963, + "args": { + "External id": 504658,"Record function id": 0, "Sequence number": 5516053, "Fwd thread id": 1, "Ev Idx": 2385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142361309.942, "dur": 64983.332, + "args": { + "External id": 504659,"Sequence number": 5516053, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2386 + } + }, + { + "ph": "f", "id": 181, "pid": 2338711, "tid": 2379440, "ts": 6274142361309.942, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.21)", "pid": 2338711, "tid": 2379440, + "ts": 6274142361347.854, "dur": 41.812, + "args": { + "External id": 504660,"Record function id": 0, "Ev Idx": 2387 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.21)", "pid": 2338711, "tid": 2379440, + "ts": 6274142361397.695, "dur": 64.247, + "args": { + "External id": 504661,"Record function id": 0, "Ev Idx": 2388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.21)", "pid": 2338711, "tid": 2379440, + "ts": 6274142361467.439, "dur": 64817.042, + "args": { + "External id": 504662,"Record function id": 0, "Ev Idx": 2389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142361560.913, "dur": 7.622, + "args": { + "External id": 504663,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142361578.853, "dur": 24.864, + "args": { + "External id": 504664,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142361627.487, "dur": 63642.641, + "args": { + "External id": 504665,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142361651.271, "dur": 63603.446, + "args": { + "External id": 504666,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142361747.840, "dur": 21.337, + "args": { + "External id": 504667,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142361789.424, "dur": 63407.528, + "args": { + "External id": 504668,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142361792.903, "dur": 63402.814, + "args": { + "External id": 504669,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142361798.380, "dur": 11.552, + "args": { + "External id": 504670,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142361812.130, "dur": 63376.586, + "args": { + "External id": 504671,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142425389.249, "dur": 15.280, + "args": { + "External id": 504672,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142425394.040, "dur": 10.049, + "args": { + "External id": 504673,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142425440.487, "dur": 435.892, + "args": { + "External id": 504674,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142425474.485, "dur": 395.410, + "args": { + "External id": 504675,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2402, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142425487.311, "dur": 374.820, + "args": { + "External id": 504676,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142425917.405, "dur": 2.824, + "args": { + "External id": 504677,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2404, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142425993.278, "dur": 7.594, + "args": { + "External id": 504678,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142426050.973, "dur": 40.354, + "args": { + "External id": 504679,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142426116.413, "dur": 3.805, + "args": { + "External id": 504680,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142426135.501, "dur": 0.930, + "args": { + "External id": 504681,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142426150.498, "dur": 0.948, + "args": { + "External id": 504682,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142426164.770, "dur": 0.981, + "args": { + "External id": 504683,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142426178.415, "dur": 3.478, + "args": { + "External id": 504684,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142426194.591, "dur": 2.549, + "args": { + "External id": 504685,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142426208.453, "dur": 1.198, + "args": { + "External id": 504686,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142426324.752, "dur": 3242.683, + "args": { + "External id": 504687,"Record function id": 0, "Ev Idx": 2414 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.20)", "pid": 2338711, "tid": 2379440, + "ts": 6274142426346.263, "dur": 1197.095, + "args": { + "External id": 504688,"Record function id": 0, "Ev Idx": 2415 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2338711, "tid": 2379440, + "ts": 6274142426363.837, "dur": 400.200, + "args": { + "External id": 504689,"Record function id": 0, "Ev Idx": 2416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142426454.542, "dur": 4.515, + "args": { + "External id": 504690,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142426462.633, "dur": 0.977, + "args": { + "External id": 504691,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142426465.454, "dur": 3.313, + "args": { + "External id": 504692,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142426470.420, "dur": 0.924, + "args": { + "External id": 504693,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142426472.911, "dur": 1.239, + "args": { + "External id": 504694,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142426476.106, "dur": 0.830, + "args": { + "External id": 504695,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142426478.359, "dur": 2.771, + "args": { + "External id": 504696,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142426482.680, "dur": 0.791, + "args": { + "External id": 504697,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142426485.047, "dur": 0.753, + "args": { + "External id": 504698,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142426487.438, "dur": 0.933, + "args": { + "External id": 504699,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142426507.912, "dur": 214.092, + "args": { + "External id": 504700,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142426524.140, "dur": 191.256, + "args": { + "External id": 504701,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142426550.232, "dur": 16.912, + "args": { + "External id": 504702,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142426571.327, "dur": 106.792, + "args": { + "External id": 504703,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142426574.454, "dur": 103.170, + "args": { + "External id": 504704,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142426579.316, "dur": 26.738, + "args": { + "External id": 504705,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142426609.859, "dur": 66.969, + "args": { + "External id": 504706,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2433 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.19", "pid": 2338711, "tid": 2379440, + "ts": 6274142426862.676, "dur": 671.992, + "args": { + "External id": 504707,"Record function id": 0, "Ev Idx": 2434 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2338711, "tid": 2379440, + "ts": 6274142426882.971, "dur": 638.211, + "args": { + "External id": 504708,"Record function id": 0, "Ev Idx": 2435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142426945.057, "dur": 7.220, + "args": { + "External id": 504709,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142426968.272, "dur": 36.389, + "args": { + "External id": 504710,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142426974.355, "dur": 1.924, + "args": { + "External id": 504711,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142426978.921, "dur": 0.518, + "args": { + "External id": 504712,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142426981.541, "dur": 0.544, + "args": { + "External id": 504713,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142426983.976, "dur": 0.529, + "args": { + "External id": 504714,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142426986.637, "dur": 0.626, + "args": { + "External id": 504715,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142426989.591, "dur": 2.857, + "args": { + "External id": 504716,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142426994.552, "dur": 0.472, + "args": { + "External id": 504717,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142426996.622, "dur": 0.364, + "args": { + "External id": 504718,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142426998.564, "dur": 0.347, + "args": { + "External id": 504719,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142427015.390, "dur": 86.990, + "args": { + "External id": 504720,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274142427142.639, "dur": 130.136, + "args": { + "External id": 504721,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142427155.530, "dur": 4.866, + "args": { + "External id": 504722,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274142427166.128, "dur": 12.893, + "args": { + "External id": 504723,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274142427171.152, "dur": 7.421, + "args": { + "External id": 504724,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142427175.952, "dur": 0.996, + "args": { + "External id": 504725,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142427186.331, "dur": 30.676, + "args": { + "External id": 504726,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142427189.037, "dur": 0.742, + "args": { + "External id": 504727,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142427191.709, "dur": 0.461, + "args": { + "External id": 504728,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142427194.324, "dur": 2.897, + "args": { + "External id": 504729,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142427198.931, "dur": 0.461, + "args": { + "External id": 504730,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142427201.557, "dur": 0.419, + "args": { + "External id": 504731,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142427203.822, "dur": 0.534, + "args": { + "External id": 504732,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142427206.430, "dur": 0.439, + "args": { + "External id": 504733,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142427208.786, "dur": 0.474, + "args": { + "External id": 504734,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142427211.330, "dur": 0.419, + "args": { + "External id": 504735,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142427230.466, "dur": 34.395, + "args": { + "External id": 504736,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142427323.580, "dur": 124.220, + "args": { + "External id": 504737,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142427348.075, "dur": 95.539, + "args": { + "External id": 504738,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2465, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142427362.466, "dur": 76.769, + "args": { + "External id": 504739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142427463.159, "dur": 1.678, + "args": { + "External id": 504740,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2467, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142427551.295, "dur": 1993.574, + "args": { + "External id": 504741,"Sequence number": 5516052, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2468 + } + }, + { + "ph": "f", "id": 182, "pid": 2338711, "tid": 2379440, "ts": 6274142427551.295, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142427710.036, "dur": 116.630, + "args": { + "External id": 504742,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142427870.298, "dur": 39.738, + "args": { + "External id": 504743,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274142427928.684, "dur": 55.048, + "args": { + "External id": 504744,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142428000.059, "dur": 43.546, + "args": { + "External id": 504745,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142428095.515, "dur": 44.187, + "args": { + "External id": 504746,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142428150.833, "dur": 30.678, + "args": { + "External id": 504747,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142428188.665, "dur": 31.057, + "args": { + "External id": 504748,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142428247.455, "dur": 24.241, + "args": { + "External id": 504749,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142428289.938, "dur": 28.698, + "args": { + "External id": 504750,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142428338.184, "dur": 19.323, + "args": { + "External id": 504751,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142428369.703, "dur": 14.621, + "args": { + "External id": 504752,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142428393.349, "dur": 38.012, + "args": { + "External id": 504753,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142428435.401, "dur": 35.911, + "args": { + "External id": 504754,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274142428504.387, "dur": 315.129, + "args": { + "External id": 504755,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142428607.752, "dur": 8.989, + "args": { + "External id": 504756,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142428619.976, "dur": 3.877, + "args": { + "External id": 504757,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142428625.931, "dur": 3.331, + "args": { + "External id": 504758,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142428631.592, "dur": 3.011, + "args": { + "External id": 504759,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142428698.846, "dur": 6.061, + "args": { + "External id": 504760,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142428701.476, "dur": 3.209, + "args": { + "External id": 504761,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142428707.154, "dur": 38.224, + "args": { + "External id": 504762,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142428713.404, "dur": 4.163, + "args": { + "External id": 504763,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142428747.334, "dur": 1.831, + "args": { + "External id": 504764,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142428748.459, "dur": 0.645, + "args": { + "External id": 504765,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142428750.865, "dur": 17.749, + "args": { + "External id": 504766,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142428753.547, "dur": 0.705, + "args": { + "External id": 504767,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142428857.772, "dur": 30.675, + "args": { + "External id": 504768,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142428905.394, "dur": 17.667, + "args": { + "External id": 504769,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142428932.240, "dur": 52.611, + "args": { + "External id": 504770,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142428992.414, "dur": 42.290, + "args": { + "External id": 504771,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142429100.360, "dur": 42.592, + "args": { + "External id": 504772,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142429157.026, "dur": 35.607, + "args": { + "External id": 504773,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142429202.335, "dur": 33.487, + "args": { + "External id": 504774,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142429244.040, "dur": 34.720, + "args": { + "External id": 504775,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274142429301.130, "dur": 29.647, + "args": { + "External id": 504776,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142429386.602, "dur": 25.332, + "args": { + "External id": 504777,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142429429.036, "dur": 19.032, + "args": { + "External id": 504778,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142429466.393, "dur": 14.624, + "args": { + "External id": 504779,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274142429495.071, "dur": 16.217, + "args": { + "External id": 504780,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142429609.557, "dur": 22.434, + "args": { + "External id": 504781,"Record function id": 0, "Ev Idx": 2508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142429615.098, "dur": 15.130, + "args": { + "External id": 504782,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142429620.408, "dur": 7.940, + "args": { + "External id": 504783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142429622.843, "dur": 5.103, + "args": { + "External id": 504784,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142429639.365, "dur": 8.111, + "args": { + "External id": 504785,"Record function id": 0, "Ev Idx": 2512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142429641.689, "dur": 5.058, + "args": { + "External id": 504786,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142429642.821, "dur": 3.178, + "args": { + "External id": 504787,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142429644.272, "dur": 1.551, + "args": { + "External id": 504788,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142429652.452, "dur": 10.088, + "args": { + "External id": 504789,"Record function id": 0, "Ev Idx": 2516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142429654.718, "dur": 7.146, + "args": { + "External id": 504790,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142429655.944, "dur": 5.348, + "args": { + "External id": 504791,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142429656.639, "dur": 4.494, + "args": { + "External id": 504792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142429667.163, "dur": 6.529, + "args": { + "External id": 504793,"Record function id": 0, "Ev Idx": 2520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142429669.174, "dur": 3.686, + "args": { + "External id": 504794,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142429670.117, "dur": 2.202, + "args": { + "External id": 504795,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142429670.839, "dur": 1.327, + "args": { + "External id": 504796,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142429677.450, "dur": 4.559, + "args": { + "External id": 504797,"Record function id": 0, "Ev Idx": 2524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142429678.884, "dur": 2.635, + "args": { + "External id": 504798,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142429679.716, "dur": 1.342, + "args": { + "External id": 504799,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142429680.077, "dur": 0.903, + "args": { + "External id": 504800,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142429685.070, "dur": 4.854, + "args": { + "External id": 504801,"Record function id": 0, "Ev Idx": 2528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142429686.538, "dur": 3.013, + "args": { + "External id": 504802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142429686.979, "dur": 2.158, + "args": { + "External id": 504803,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142429688.191, "dur": 0.824, + "args": { + "External id": 504804,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142429693.553, "dur": 4.075, + "args": { + "External id": 504805,"Record function id": 0, "Ev Idx": 2532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142429695.044, "dur": 2.164, + "args": { + "External id": 504806,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142429695.612, "dur": 1.198, + "args": { + "External id": 504807,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142429696.088, "dur": 0.614, + "args": { + "External id": 504808,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142429700.945, "dur": 4.277, + "args": { + "External id": 504809,"Record function id": 0, "Ev Idx": 2536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142429702.104, "dur": 2.746, + "args": { + "External id": 504810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142429702.790, "dur": 1.634, + "args": { + "External id": 504811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142429703.456, "dur": 0.865, + "args": { + "External id": 504812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142429708.444, "dur": 4.214, + "args": { + "External id": 504813,"Record function id": 0, "Ev Idx": 2540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142429709.975, "dur": 2.310, + "args": { + "External id": 504814,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142429710.634, "dur": 1.207, + "args": { + "External id": 504815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142429711.073, "dur": 0.660, + "args": { + "External id": 504816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142429716.379, "dur": 67844.327, + "args": { + "External id": 504817,"Record function id": 0, "Sequence number": 5516051, "Fwd thread id": 1, "Ev Idx": 2544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142429717.668, "dur": 67833.524, + "args": { + "External id": 504818,"Sequence number": 5516051, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2545 + } + }, + { + "ph": "f", "id": 183, "pid": 2338711, "tid": 2379440, "ts": 6274142429717.668, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.20)", "pid": 2338711, "tid": 2379440, + "ts": 6274142429756.266, "dur": 43.541, + "args": { + "External id": 504819,"Record function id": 0, "Ev Idx": 2546 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.20)", "pid": 2338711, "tid": 2379440, + "ts": 6274142429808.429, "dur": 67.595, + "args": { + "External id": 504820,"Record function id": 0, "Ev Idx": 2547 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.20)", "pid": 2338711, "tid": 2379440, + "ts": 6274142429882.497, "dur": 67659.997, + "args": { + "External id": 504821,"Record function id": 0, "Ev Idx": 2548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142429978.301, "dur": 8.534, + "args": { + "External id": 504822,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142429997.329, "dur": 7.086, + "args": { + "External id": 504823,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142430021.088, "dur": 66464.361, + "args": { + "External id": 504824,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142430035.869, "dur": 66434.339, + "args": { + "External id": 504825,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142430171.770, "dur": 21.117, + "args": { + "External id": 504826,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142430213.723, "dur": 66203.069, + "args": { + "External id": 504827,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142430217.233, "dur": 66198.122, + "args": { + "External id": 504828,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142430222.416, "dur": 12.220, + "args": { + "External id": 504829,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142430239.296, "dur": 66169.043, + "args": { + "External id": 504830,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142496623.428, "dur": 18.914, + "args": { + "External id": 504831,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142496628.932, "dur": 12.755, + "args": { + "External id": 504832,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142496704.695, "dur": 486.091, + "args": { + "External id": 504833,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142496739.754, "dur": 444.510, + "args": { + "External id": 504834,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2561, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142496753.909, "dur": 423.820, + "args": { + "External id": 504835,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142497217.804, "dur": 2.657, + "args": { + "External id": 504836,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2563, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142497302.667, "dur": 8.079, + "args": { + "External id": 504837,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142497360.675, "dur": 1.602, + "args": { + "External id": 504838,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142497380.209, "dur": 3.579, + "args": { + "External id": 504839,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142497398.061, "dur": 0.985, + "args": { + "External id": 504840,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142497412.760, "dur": 1.215, + "args": { + "External id": 504841,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142497425.928, "dur": 1.362, + "args": { + "External id": 504842,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142497439.466, "dur": 3.662, + "args": { + "External id": 504843,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142497455.906, "dur": 2.267, + "args": { + "External id": 504844,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142497469.533, "dur": 0.797, + "args": { + "External id": 504845,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142497576.257, "dur": 3248.503, + "args": { + "External id": 504846,"Record function id": 0, "Ev Idx": 2573 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.19)", "pid": 2338711, "tid": 2379440, + "ts": 6274142497621.095, "dur": 1218.475, + "args": { + "External id": 504847,"Record function id": 0, "Ev Idx": 2574 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2338711, "tid": 2379440, + "ts": 6274142497642.677, "dur": 379.466, + "args": { + "External id": 504848,"Record function id": 0, "Ev Idx": 2575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142497746.164, "dur": 6.100, + "args": { + "External id": 504849,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142497756.449, "dur": 1.342, + "args": { + "External id": 504850,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142497759.675, "dur": 3.893, + "args": { + "External id": 504851,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142497765.301, "dur": 1.381, + "args": { + "External id": 504852,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142497768.481, "dur": 1.181, + "args": { + "External id": 504853,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142497771.238, "dur": 0.967, + "args": { + "External id": 504854,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142497773.793, "dur": 2.440, + "args": { + "External id": 504855,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142497777.717, "dur": 0.958, + "args": { + "External id": 504856,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142497780.208, "dur": 0.935, + "args": { + "External id": 504857,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142497782.839, "dur": 1.216, + "args": { + "External id": 504858,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142497803.198, "dur": 182.567, + "args": { + "External id": 504859,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142497821.235, "dur": 159.269, + "args": { + "External id": 504860,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142497846.902, "dur": 21.869, + "args": { + "External id": 504861,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142497873.091, "dur": 73.625, + "args": { + "External id": 504862,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142497875.911, "dur": 70.488, + "args": { + "External id": 504863,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142497880.911, "dur": 5.808, + "args": { + "External id": 504864,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142497888.593, "dur": 57.199, + "args": { + "External id": 504865,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2592 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.18", "pid": 2338711, "tid": 2379440, + "ts": 6274142498166.421, "dur": 663.891, + "args": { + "External id": 504866,"Record function id": 0, "Ev Idx": 2593 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2338711, "tid": 2379440, + "ts": 6274142498186.565, "dur": 629.500, + "args": { + "External id": 504867,"Record function id": 0, "Ev Idx": 2594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142498248.682, "dur": 6.738, + "args": { + "External id": 504868,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142498272.418, "dur": 35.944, + "args": { + "External id": 504869,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142498278.563, "dur": 1.896, + "args": { + "External id": 504870,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142498282.804, "dur": 0.614, + "args": { + "External id": 504871,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142498285.661, "dur": 0.628, + "args": { + "External id": 504872,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142498288.418, "dur": 0.441, + "args": { + "External id": 504873,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142498290.860, "dur": 0.758, + "args": { + "External id": 504874,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142498293.709, "dur": 2.333, + "args": { + "External id": 504875,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142498298.082, "dur": 0.473, + "args": { + "External id": 504876,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142498300.612, "dur": 0.406, + "args": { + "External id": 504877,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142498302.728, "dur": 0.425, + "args": { + "External id": 504878,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142498318.951, "dur": 50.232, + "args": { + "External id": 504879,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274142498402.373, "dur": 119.109, + "args": { + "External id": 504880,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142498412.863, "dur": 3.415, + "args": { + "External id": 504881,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274142498421.805, "dur": 11.430, + "args": { + "External id": 504882,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274142498426.739, "dur": 6.070, + "args": { + "External id": 504883,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142498430.860, "dur": 0.563, + "args": { + "External id": 504884,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142498440.574, "dur": 30.133, + "args": { + "External id": 504885,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142498443.279, "dur": 0.699, + "args": { + "External id": 504886,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142498446.392, "dur": 0.504, + "args": { + "External id": 504887,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142498448.971, "dur": 2.368, + "args": { + "External id": 504888,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142498453.115, "dur": 0.354, + "args": { + "External id": 504889,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142498455.710, "dur": 0.542, + "args": { + "External id": 504890,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142498458.348, "dur": 0.682, + "args": { + "External id": 504891,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142498460.819, "dur": 0.672, + "args": { + "External id": 504892,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142498463.054, "dur": 0.465, + "args": { + "External id": 504893,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142498465.330, "dur": 0.506, + "args": { + "External id": 504894,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142498481.473, "dur": 31.979, + "args": { + "External id": 504895,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142498568.456, "dur": 159.443, + "args": { + "External id": 504896,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142498612.527, "dur": 110.378, + "args": { + "External id": 504897,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2624, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142498626.699, "dur": 90.557, + "args": { + "External id": 504898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142498747.332, "dur": 2.440, + "args": { + "External id": 504899,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2626, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142498848.060, "dur": 1954.133, + "args": { + "External id": 504900,"Sequence number": 5516050, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2627 + } + }, + { + "ph": "f", "id": 184, "pid": 2338711, "tid": 2379440, "ts": 6274142498848.060, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142498969.405, "dur": 154.526, + "args": { + "External id": 504901,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142499171.756, "dur": 41.507, + "args": { + "External id": 504902,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274142499234.180, "dur": 58.574, + "args": { + "External id": 504903,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142499305.747, "dur": 35.217, + "args": { + "External id": 504904,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142499348.990, "dur": 36.236, + "args": { + "External id": 504905,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142499393.202, "dur": 30.569, + "args": { + "External id": 504906,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142499431.875, "dur": 30.976, + "args": { + "External id": 504907,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142499493.818, "dur": 23.588, + "args": { + "External id": 504908,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142499535.216, "dur": 29.050, + "args": { + "External id": 504909,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142499601.947, "dur": 29.142, + "args": { + "External id": 504910,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142499652.966, "dur": 20.802, + "args": { + "External id": 504911,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142499690.437, "dur": 48.531, + "args": { + "External id": 504912,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142499747.645, "dur": 35.380, + "args": { + "External id": 504913,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274142499822.082, "dur": 305.976, + "args": { + "External id": 504914,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142499908.640, "dur": 7.093, + "args": { + "External id": 504915,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142499918.102, "dur": 2.873, + "args": { + "External id": 504916,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142499922.761, "dur": 2.868, + "args": { + "External id": 504917,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142499927.117, "dur": 2.291, + "args": { + "External id": 504918,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142499979.253, "dur": 5.728, + "args": { + "External id": 504919,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142499981.862, "dur": 2.947, + "args": { + "External id": 504920,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142499987.116, "dur": 33.846, + "args": { + "External id": 504921,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142499992.873, "dur": 3.778, + "args": { + "External id": 504922,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142500023.033, "dur": 2.078, + "args": { + "External id": 504923,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142500024.380, "dur": 0.644, + "args": { + "External id": 504924,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142500026.607, "dur": 15.042, + "args": { + "External id": 504925,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142500028.882, "dur": 0.723, + "args": { + "External id": 504926,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142500168.901, "dur": 31.855, + "args": { + "External id": 504927,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142500218.352, "dur": 17.644, + "args": { + "External id": 504928,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142500245.722, "dur": 65.556, + "args": { + "External id": 504929,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142500325.672, "dur": 47.549, + "args": { + "External id": 504930,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142500385.832, "dur": 25.313, + "args": { + "External id": 504931,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142500418.552, "dur": 32.774, + "args": { + "External id": 504932,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142500463.207, "dur": 30.176, + "args": { + "External id": 504933,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142500501.046, "dur": 31.759, + "args": { + "External id": 504934,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274142500552.791, "dur": 24.737, + "args": { + "External id": 504935,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142500616.173, "dur": 33.481, + "args": { + "External id": 504936,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142500670.952, "dur": 23.293, + "args": { + "External id": 504937,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142500717.785, "dur": 16.693, + "args": { + "External id": 504938,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274142500748.714, "dur": 17.900, + "args": { + "External id": 504939,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142500849.532, "dur": 17.155, + "args": { + "External id": 504940,"Record function id": 0, "Ev Idx": 2667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142500853.125, "dur": 12.526, + "args": { + "External id": 504941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142500857.968, "dur": 6.797, + "args": { + "External id": 504942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142500860.076, "dur": 4.554, + "args": { + "External id": 504943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142500870.595, "dur": 4.880, + "args": { + "External id": 504944,"Record function id": 0, "Ev Idx": 2671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142500872.041, "dur": 3.005, + "args": { + "External id": 504945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142500872.675, "dur": 1.876, + "args": { + "External id": 504946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142500873.366, "dur": 1.070, + "args": { + "External id": 504947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142500879.190, "dur": 7.298, + "args": { + "External id": 504948,"Record function id": 0, "Ev Idx": 2675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142500880.776, "dur": 5.265, + "args": { + "External id": 504949,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142500881.323, "dur": 4.233, + "args": { + "External id": 504950,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142500881.665, "dur": 3.799, + "args": { + "External id": 504951,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142500889.868, "dur": 3.710, + "args": { + "External id": 504952,"Record function id": 0, "Ev Idx": 2679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142500891.114, "dur": 2.070, + "args": { + "External id": 504953,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142500891.638, "dur": 1.158, + "args": { + "External id": 504954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142500891.932, "dur": 0.779, + "args": { + "External id": 504955,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142500896.850, "dur": 3.795, + "args": { + "External id": 504956,"Record function id": 0, "Ev Idx": 2683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142500898.014, "dur": 2.216, + "args": { + "External id": 504957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142500898.463, "dur": 1.327, + "args": { + "External id": 504958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142500898.956, "dur": 0.757, + "args": { + "External id": 504959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142500904.178, "dur": 4.423, + "args": { + "External id": 504960,"Record function id": 0, "Ev Idx": 2687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142500905.410, "dur": 2.808, + "args": { + "External id": 504961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142500906.178, "dur": 1.650, + "args": { + "External id": 504962,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142500906.969, "dur": 0.746, + "args": { + "External id": 504963,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142500911.839, "dur": 3.781, + "args": { + "External id": 504964,"Record function id": 0, "Ev Idx": 2691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142500913.338, "dur": 1.874, + "args": { + "External id": 504965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142500913.834, "dur": 0.986, + "args": { + "External id": 504966,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142500914.121, "dur": 0.623, + "args": { + "External id": 504967,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142500918.780, "dur": 3.867, + "args": { + "External id": 504968,"Record function id": 0, "Ev Idx": 2695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142500920.054, "dur": 2.204, + "args": { + "External id": 504969,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142500920.536, "dur": 1.261, + "args": { + "External id": 504970,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142500921.150, "dur": 0.568, + "args": { + "External id": 504971,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142500925.735, "dur": 3.980, + "args": { + "External id": 504972,"Record function id": 0, "Ev Idx": 2699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142500927.046, "dur": 2.258, + "args": { + "External id": 504973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142500927.582, "dur": 1.308, + "args": { + "External id": 504974,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142500928.075, "dur": 0.715, + "args": { + "External id": 504975,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142500933.712, "dur": 65228.633, + "args": { + "External id": 504976,"Record function id": 0, "Sequence number": 5516049, "Fwd thread id": 1, "Ev Idx": 2703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142500934.897, "dur": 65216.704, + "args": { + "External id": 504977,"Sequence number": 5516049, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2704 + } + }, + { + "ph": "f", "id": 185, "pid": 2338711, "tid": 2379440, "ts": 6274142500934.897, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.19)", "pid": 2338711, "tid": 2379440, + "ts": 6274142500968.457, "dur": 43.305, + "args": { + "External id": 504978,"Record function id": 0, "Ev Idx": 2705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.19)", "pid": 2338711, "tid": 2379440, + "ts": 6274142501019.928, "dur": 108.605, + "args": { + "External id": 504979,"Record function id": 0, "Ev Idx": 2706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.19)", "pid": 2338711, "tid": 2379440, + "ts": 6274142501137.144, "dur": 65004.423, + "args": { + "External id": 504980,"Record function id": 0, "Ev Idx": 2707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142501236.795, "dur": 7.951, + "args": { + "External id": 504981,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142501256.024, "dur": 7.570, + "args": { + "External id": 504982,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142501280.550, "dur": 63828.843, + "args": { + "External id": 504983,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142501295.804, "dur": 63797.908, + "args": { + "External id": 504984,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142501437.241, "dur": 19.825, + "args": { + "External id": 504985,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142501478.214, "dur": 63536.722, + "args": { + "External id": 504986,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142501481.755, "dur": 63531.947, + "args": { + "External id": 504987,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142501487.159, "dur": 10.645, + "args": { + "External id": 504988,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142501500.252, "dur": 63506.620, + "args": { + "External id": 504989,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142565235.724, "dur": 14.726, + "args": { + "External id": 504990,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142565240.396, "dur": 9.554, + "args": { + "External id": 504991,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142565286.626, "dur": 453.440, + "args": { + "External id": 504992,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142565317.751, "dur": 415.755, + "args": { + "External id": 504993,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2720, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142565331.350, "dur": 391.859, + "args": { + "External id": 504994,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142565766.456, "dur": 2.729, + "args": { + "External id": 504995,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2722, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142565842.632, "dur": 7.844, + "args": { + "External id": 504996,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142565901.000, "dur": 1.653, + "args": { + "External id": 504997,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142565921.382, "dur": 4.011, + "args": { + "External id": 504998,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142565939.148, "dur": 1.063, + "args": { + "External id": 504999,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142565953.313, "dur": 0.903, + "args": { + "External id": 505000,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142565966.383, "dur": 1.055, + "args": { + "External id": 505001,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142565983.451, "dur": 3.357, + "args": { + "External id": 505002,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142565998.939, "dur": 2.606, + "args": { + "External id": 505003,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142566022.193, "dur": 1.073, + "args": { + "External id": 505004,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142566180.622, "dur": 3251.675, + "args": { + "External id": 505005,"Record function id": 0, "Ev Idx": 2732 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.18)", "pid": 2338711, "tid": 2379440, + "ts": 6274142566203.036, "dur": 1224.743, + "args": { + "External id": 505006,"Record function id": 0, "Ev Idx": 2733 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2338711, "tid": 2379440, + "ts": 6274142566219.783, "dur": 359.795, + "args": { + "External id": 505007,"Record function id": 0, "Ev Idx": 2734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142566323.351, "dur": 5.500, + "args": { + "External id": 505008,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142566332.933, "dur": 0.994, + "args": { + "External id": 505009,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142566335.905, "dur": 3.431, + "args": { + "External id": 505010,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142566340.918, "dur": 1.006, + "args": { + "External id": 505011,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142566343.436, "dur": 0.999, + "args": { + "External id": 505012,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142566346.234, "dur": 0.845, + "args": { + "External id": 505013,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142566348.838, "dur": 2.629, + "args": { + "External id": 505014,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142566352.885, "dur": 1.020, + "args": { + "External id": 505015,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142566355.427, "dur": 0.991, + "args": { + "External id": 505016,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142566357.805, "dur": 1.086, + "args": { + "External id": 505017,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142566378.014, "dur": 169.488, + "args": { + "External id": 505018,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142566395.629, "dur": 146.455, + "args": { + "External id": 505019,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142566416.060, "dur": 17.086, + "args": { + "External id": 505020,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142566437.520, "dur": 73.729, + "args": { + "External id": 505021,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142566440.702, "dur": 70.189, + "args": { + "External id": 505022,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142566445.701, "dur": 6.218, + "args": { + "External id": 505023,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142566453.973, "dur": 56.209, + "args": { + "External id": 505024,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2751 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.17", "pid": 2338711, "tid": 2379440, + "ts": 6274142566711.860, "dur": 706.077, + "args": { + "External id": 505025,"Record function id": 0, "Ev Idx": 2752 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2338711, "tid": 2379440, + "ts": 6274142566733.923, "dur": 669.628, + "args": { + "External id": 505026,"Record function id": 0, "Ev Idx": 2753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142566797.764, "dur": 7.214, + "args": { + "External id": 505027,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142566821.729, "dur": 38.772, + "args": { + "External id": 505028,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142566828.545, "dur": 1.972, + "args": { + "External id": 505029,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142566833.545, "dur": 0.758, + "args": { + "External id": 505030,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142566836.541, "dur": 0.421, + "args": { + "External id": 505031,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142566838.824, "dur": 1.030, + "args": { + "External id": 505032,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142566841.677, "dur": 0.620, + "args": { + "External id": 505033,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142566844.635, "dur": 2.501, + "args": { + "External id": 505034,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142566848.891, "dur": 0.448, + "args": { + "External id": 505035,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142566851.298, "dur": 0.667, + "args": { + "External id": 505036,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142566854.121, "dur": 0.396, + "args": { + "External id": 505037,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142566876.377, "dur": 54.868, + "args": { + "External id": 505038,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274142566966.129, "dur": 183.329, + "args": { + "External id": 505039,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142566977.792, "dur": 4.403, + "args": { + "External id": 505040,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274142566987.984, "dur": 11.837, + "args": { + "External id": 505041,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274142566993.182, "dur": 6.206, + "args": { + "External id": 505042,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142566997.529, "dur": 0.674, + "args": { + "External id": 505043,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142567007.128, "dur": 28.685, + "args": { + "External id": 505044,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142567009.807, "dur": 0.809, + "args": { + "External id": 505045,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142567012.845, "dur": 0.489, + "args": { + "External id": 505046,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142567015.186, "dur": 2.655, + "args": { + "External id": 505047,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142567020.395, "dur": 0.397, + "args": { + "External id": 505048,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142567022.695, "dur": 0.358, + "args": { + "External id": 505049,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142567024.849, "dur": 0.540, + "args": { + "External id": 505050,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142567027.114, "dur": 0.387, + "args": { + "External id": 505051,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142567029.423, "dur": 0.383, + "args": { + "External id": 505052,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142567031.320, "dur": 0.446, + "args": { + "External id": 505053,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142567048.706, "dur": 87.497, + "args": { + "External id": 505054,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142567205.483, "dur": 122.012, + "args": { + "External id": 505055,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142567232.033, "dur": 91.789, + "args": { + "External id": 505056,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2783, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142567243.441, "dur": 75.693, + "args": { + "External id": 505057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142567343.428, "dur": 2.191, + "args": { + "External id": 505058,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2785, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142567435.680, "dur": 1973.977, + "args": { + "External id": 505059,"Sequence number": 5516048, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2786 + } + }, + { + "ph": "f", "id": 186, "pid": 2338711, "tid": 2379440, "ts": 6274142567435.680, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142567556.362, "dur": 152.536, + "args": { + "External id": 505060,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142567758.357, "dur": 40.576, + "args": { + "External id": 505061,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274142567820.438, "dur": 57.471, + "args": { + "External id": 505062,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142567890.693, "dur": 34.914, + "args": { + "External id": 505063,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142567933.040, "dur": 38.335, + "args": { + "External id": 505064,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142567979.457, "dur": 29.497, + "args": { + "External id": 505065,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142568017.134, "dur": 34.281, + "args": { + "External id": 505066,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142568127.128, "dur": 28.525, + "args": { + "External id": 505067,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142568177.875, "dur": 32.156, + "args": { + "External id": 505068,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142568234.402, "dur": 21.061, + "args": { + "External id": 505069,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142568268.244, "dur": 16.860, + "args": { + "External id": 505070,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142568294.489, "dur": 42.464, + "args": { + "External id": 505071,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142568340.836, "dur": 35.277, + "args": { + "External id": 505072,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274142568411.513, "dur": 317.051, + "args": { + "External id": 505073,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142568497.885, "dur": 7.481, + "args": { + "External id": 505074,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142568507.773, "dur": 2.707, + "args": { + "External id": 505075,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142568512.045, "dur": 2.424, + "args": { + "External id": 505076,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142568515.764, "dur": 2.281, + "args": { + "External id": 505077,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142568568.507, "dur": 5.621, + "args": { + "External id": 505078,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142568570.839, "dur": 3.133, + "args": { + "External id": 505079,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142568576.747, "dur": 64.687, + "args": { + "External id": 505080,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142568583.399, "dur": 23.240, + "args": { + "External id": 505081,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142568645.373, "dur": 3.420, + "args": { + "External id": 505082,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142568647.387, "dur": 1.272, + "args": { + "External id": 505083,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142568650.978, "dur": 22.711, + "args": { + "External id": 505084,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142568655.186, "dur": 0.851, + "args": { + "External id": 505085,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142568769.099, "dur": 31.772, + "args": { + "External id": 505086,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142568818.425, "dur": 19.802, + "args": { + "External id": 505087,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142568848.291, "dur": 71.001, + "args": { + "External id": 505088,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142568932.956, "dur": 43.413, + "args": { + "External id": 505089,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142568993.085, "dur": 22.417, + "args": { + "External id": 505090,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142569022.234, "dur": 72.261, + "args": { + "External id": 505091,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142569107.283, "dur": 35.348, + "args": { + "External id": 505092,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142569151.466, "dur": 31.440, + "args": { + "External id": 505093,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274142569205.997, "dur": 29.908, + "args": { + "External id": 505094,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142569252.377, "dur": 28.508, + "args": { + "External id": 505095,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142569296.259, "dur": 18.524, + "args": { + "External id": 505096,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142569331.797, "dur": 14.461, + "args": { + "External id": 505097,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274142569359.264, "dur": 16.826, + "args": { + "External id": 505098,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142569456.985, "dur": 17.117, + "args": { + "External id": 505099,"Record function id": 0, "Ev Idx": 2826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142569461.066, "dur": 12.166, + "args": { + "External id": 505100,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142569465.770, "dur": 6.349, + "args": { + "External id": 505101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142569467.647, "dur": 4.349, + "args": { + "External id": 505102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142569478.152, "dur": 5.330, + "args": { + "External id": 505103,"Record function id": 0, "Ev Idx": 2830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142569479.820, "dur": 3.184, + "args": { + "External id": 505104,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142569480.366, "dur": 2.184, + "args": { + "External id": 505105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142569481.359, "dur": 1.090, + "args": { + "External id": 505106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142569486.867, "dur": 6.812, + "args": { + "External id": 505107,"Record function id": 0, "Ev Idx": 2834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142569488.446, "dur": 4.825, + "args": { + "External id": 505108,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142569488.946, "dur": 3.898, + "args": { + "External id": 505109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142569489.306, "dur": 3.458, + "args": { + "External id": 505110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142569497.101, "dur": 4.738, + "args": { + "External id": 505111,"Record function id": 0, "Ev Idx": 2838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142569498.661, "dur": 2.705, + "args": { + "External id": 505112,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142569499.128, "dur": 1.845, + "args": { + "External id": 505113,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142569499.786, "dur": 1.120, + "args": { + "External id": 505114,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142569505.086, "dur": 4.641, + "args": { + "External id": 505115,"Record function id": 0, "Ev Idx": 2842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142569506.544, "dur": 2.770, + "args": { + "External id": 505116,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142569507.435, "dur": 1.480, + "args": { + "External id": 505117,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142569507.852, "dur": 0.991, + "args": { + "External id": 505118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142569513.069, "dur": 3.804, + "args": { + "External id": 505119,"Record function id": 0, "Ev Idx": 2846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142569514.206, "dur": 2.229, + "args": { + "External id": 505120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142569514.667, "dur": 1.319, + "args": { + "External id": 505121,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142569515.201, "dur": 0.637, + "args": { + "External id": 505122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142569520.390, "dur": 3.494, + "args": { + "External id": 505123,"Record function id": 0, "Ev Idx": 2850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142569521.590, "dur": 1.886, + "args": { + "External id": 505124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142569522.066, "dur": 1.022, + "args": { + "External id": 505125,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142569522.346, "dur": 0.635, + "args": { + "External id": 505126,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142569527.018, "dur": 3.600, + "args": { + "External id": 505127,"Record function id": 0, "Ev Idx": 2854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142569528.350, "dur": 1.830, + "args": { + "External id": 505128,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142569528.774, "dur": 1.028, + "args": { + "External id": 505129,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142569529.062, "dur": 0.665, + "args": { + "External id": 505130,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142569533.708, "dur": 3.658, + "args": { + "External id": 505131,"Record function id": 0, "Ev Idx": 2858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142569534.922, "dur": 2.058, + "args": { + "External id": 505132,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142569535.496, "dur": 1.074, + "args": { + "External id": 505133,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142569535.880, "dur": 0.582, + "args": { + "External id": 505134,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142569541.067, "dur": 65501.584, + "args": { + "External id": 505135,"Record function id": 0, "Sequence number": 5516047, "Fwd thread id": 1, "Ev Idx": 2862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142569542.327, "dur": 65490.026, + "args": { + "External id": 505136,"Sequence number": 5516047, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2863 + } + }, + { + "ph": "f", "id": 187, "pid": 2338711, "tid": 2379440, "ts": 6274142569542.327, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.18)", "pid": 2338711, "tid": 2379440, + "ts": 6274142569575.507, "dur": 76.703, + "args": { + "External id": 505137,"Record function id": 0, "Ev Idx": 2864 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.18)", "pid": 2338711, "tid": 2379440, + "ts": 6274142569663.083, "dur": 75.860, + "args": { + "External id": 505138,"Record function id": 0, "Ev Idx": 2865 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.18)", "pid": 2338711, "tid": 2379440, + "ts": 6274142569745.105, "dur": 65278.345, + "args": { + "External id": 505139,"Record function id": 0, "Ev Idx": 2866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142569847.062, "dur": 8.088, + "args": { + "External id": 505140,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142569866.720, "dur": 7.443, + "args": { + "External id": 505141,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142569890.853, "dur": 64106.469, + "args": { + "External id": 505142,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142569906.397, "dur": 64076.134, + "args": { + "External id": 505143,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142570041.897, "dur": 63.934, + "args": { + "External id": 505144,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142570131.155, "dur": 63797.541, + "args": { + "External id": 505145,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142570134.684, "dur": 63792.739, + "args": { + "External id": 505146,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142570140.196, "dur": 12.974, + "args": { + "External id": 505147,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142570155.565, "dur": 63765.303, + "args": { + "External id": 505148,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142634147.565, "dur": 14.938, + "args": { + "External id": 505149,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142634152.237, "dur": 9.586, + "args": { + "External id": 505150,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142634197.561, "dur": 471.341, + "args": { + "External id": 505151,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142634229.239, "dur": 432.802, + "args": { + "External id": 505152,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2879, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142634242.686, "dur": 410.589, + "args": { + "External id": 505153,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142634699.228, "dur": 3.364, + "args": { + "External id": 505154,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2881, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142634782.752, "dur": 8.037, + "args": { + "External id": 505155,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142634842.637, "dur": 1.451, + "args": { + "External id": 505156,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142634861.910, "dur": 3.443, + "args": { + "External id": 505157,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142634878.941, "dur": 1.109, + "args": { + "External id": 505158,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142634892.682, "dur": 1.000, + "args": { + "External id": 505159,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142634910.901, "dur": 0.849, + "args": { + "External id": 505160,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142634924.162, "dur": 3.494, + "args": { + "External id": 505161,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142634940.818, "dur": 2.632, + "args": { + "External id": 505162,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142634954.765, "dur": 0.900, + "args": { + "External id": 505163,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142635110.252, "dur": 3321.788, + "args": { + "External id": 505164,"Record function id": 0, "Ev Idx": 2891 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.17)", "pid": 2338711, "tid": 2379440, + "ts": 6274142635136.870, "dur": 1236.459, + "args": { + "External id": 505165,"Record function id": 0, "Ev Idx": 2892 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2338711, "tid": 2379440, + "ts": 6274142635155.152, "dur": 390.360, + "args": { + "External id": 505166,"Record function id": 0, "Ev Idx": 2893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142635257.563, "dur": 5.586, + "args": { + "External id": 505167,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142635267.313, "dur": 1.160, + "args": { + "External id": 505168,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142635270.460, "dur": 2.950, + "args": { + "External id": 505169,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142635275.152, "dur": 0.981, + "args": { + "External id": 505170,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142635277.814, "dur": 0.868, + "args": { + "External id": 505171,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142635280.341, "dur": 0.811, + "args": { + "External id": 505172,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142635282.827, "dur": 2.196, + "args": { + "External id": 505173,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142635286.588, "dur": 0.621, + "args": { + "External id": 505174,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142635288.671, "dur": 0.949, + "args": { + "External id": 505175,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142635291.260, "dur": 0.808, + "args": { + "External id": 505176,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142635325.941, "dur": 178.418, + "args": { + "External id": 505177,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142635344.460, "dur": 154.643, + "args": { + "External id": 505178,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142635366.547, "dur": 18.781, + "args": { + "External id": 505179,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142635389.739, "dur": 76.177, + "args": { + "External id": 505180,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142635392.723, "dur": 72.754, + "args": { + "External id": 505181,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142635397.434, "dur": 6.205, + "args": { + "External id": 505182,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142635405.872, "dur": 58.856, + "args": { + "External id": 505183,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.16", "pid": 2338711, "tid": 2379440, + "ts": 6274142635681.388, "dur": 682.815, + "args": { + "External id": 505184,"Record function id": 0, "Ev Idx": 2911 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2338711, "tid": 2379440, + "ts": 6274142635704.391, "dur": 646.273, + "args": { + "External id": 505185,"Record function id": 0, "Ev Idx": 2912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142635771.953, "dur": 7.244, + "args": { + "External id": 505186,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142635796.196, "dur": 36.896, + "args": { + "External id": 505187,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142635803.286, "dur": 1.945, + "args": { + "External id": 505188,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142635807.866, "dur": 0.478, + "args": { + "External id": 505189,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142635810.251, "dur": 0.431, + "args": { + "External id": 505190,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142635812.570, "dur": 0.587, + "args": { + "External id": 505191,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142635815.109, "dur": 0.475, + "args": { + "External id": 505192,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142635817.954, "dur": 2.854, + "args": { + "External id": 505193,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142635822.466, "dur": 0.391, + "args": { + "External id": 505194,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142635824.571, "dur": 0.363, + "args": { + "External id": 505195,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142635826.833, "dur": 0.513, + "args": { + "External id": 505196,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142635843.597, "dur": 52.354, + "args": { + "External id": 505197,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274142635931.074, "dur": 119.392, + "args": { + "External id": 505198,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142635942.273, "dur": 3.779, + "args": { + "External id": 505199,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274142635951.574, "dur": 11.250, + "args": { + "External id": 505200,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274142635956.339, "dur": 6.045, + "args": { + "External id": 505201,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142635960.687, "dur": 0.411, + "args": { + "External id": 505202,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142635970.051, "dur": 28.939, + "args": { + "External id": 505203,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142635972.973, "dur": 0.571, + "args": { + "External id": 505204,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142635975.529, "dur": 0.524, + "args": { + "External id": 505205,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142635977.809, "dur": 2.526, + "args": { + "External id": 505206,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142635982.472, "dur": 0.637, + "args": { + "External id": 505207,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142635985.191, "dur": 0.617, + "args": { + "External id": 505208,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142635987.729, "dur": 0.523, + "args": { + "External id": 505209,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142635990.166, "dur": 0.412, + "args": { + "External id": 505210,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142635992.415, "dur": 0.500, + "args": { + "External id": 505211,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142635994.806, "dur": 0.507, + "args": { + "External id": 505212,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142636010.465, "dur": 32.352, + "args": { + "External id": 505213,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142636141.713, "dur": 133.406, + "args": { + "External id": 505214,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142636172.180, "dur": 98.835, + "args": { + "External id": 505215,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2942, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142636183.428, "dur": 83.163, + "args": { + "External id": 505216,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142636290.378, "dur": 1.740, + "args": { + "External id": 505217,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2944, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142636382.286, "dur": 2027.886, + "args": { + "External id": 505218,"Sequence number": 5516046, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2945 + } + }, + { + "ph": "f", "id": 188, "pid": 2338711, "tid": 2379440, "ts": 6274142636382.286, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142636501.731, "dur": 147.007, + "args": { + "External id": 505219,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142636700.370, "dur": 43.743, + "args": { + "External id": 505220,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274142636765.574, "dur": 65.744, + "args": { + "External id": 505221,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142636845.620, "dur": 35.794, + "args": { + "External id": 505222,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142636889.680, "dur": 43.800, + "args": { + "External id": 505223,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142636941.043, "dur": 30.963, + "args": { + "External id": 505224,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142636980.147, "dur": 31.988, + "args": { + "External id": 505225,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142637039.127, "dur": 65.314, + "args": { + "External id": 505226,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142637145.622, "dur": 35.937, + "args": { + "External id": 505227,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142637205.637, "dur": 23.026, + "args": { + "External id": 505228,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142637244.258, "dur": 16.818, + "args": { + "External id": 505229,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142637271.129, "dur": 46.536, + "args": { + "External id": 505230,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142637321.850, "dur": 35.892, + "args": { + "External id": 505231,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274142637393.711, "dur": 323.780, + "args": { + "External id": 505232,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142637481.267, "dur": 7.239, + "args": { + "External id": 505233,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142637490.971, "dur": 2.791, + "args": { + "External id": 505234,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142637502.044, "dur": 2.251, + "args": { + "External id": 505235,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142637505.779, "dur": 2.586, + "args": { + "External id": 505236,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142637557.328, "dur": 5.697, + "args": { + "External id": 505237,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142637559.718, "dur": 3.113, + "args": { + "External id": 505238,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142637565.610, "dur": 61.306, + "args": { + "External id": 505239,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142637571.675, "dur": 3.603, + "args": { + "External id": 505240,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142637630.849, "dur": 3.361, + "args": { + "External id": 505241,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142637632.720, "dur": 1.266, + "args": { + "External id": 505242,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142637636.495, "dur": 22.891, + "args": { + "External id": 505243,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142637640.241, "dur": 0.994, + "args": { + "External id": 505244,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142637758.105, "dur": 32.216, + "args": { + "External id": 505245,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142637807.879, "dur": 18.828, + "args": { + "External id": 505246,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142637840.523, "dur": 57.674, + "args": { + "External id": 505247,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142637921.408, "dur": 58.780, + "args": { + "External id": 505248,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142637994.144, "dur": 22.279, + "args": { + "External id": 505249,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142638023.166, "dur": 71.356, + "args": { + "External id": 505250,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142638110.960, "dur": 36.818, + "args": { + "External id": 505251,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142638155.906, "dur": 32.211, + "args": { + "External id": 505252,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274142638211.369, "dur": 27.717, + "args": { + "External id": 505253,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142638255.649, "dur": 24.282, + "args": { + "External id": 505254,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142638295.890, "dur": 17.855, + "args": { + "External id": 505255,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142638331.101, "dur": 14.798, + "args": { + "External id": 505256,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274142638359.158, "dur": 16.853, + "args": { + "External id": 505257,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142638456.779, "dur": 16.986, + "args": { + "External id": 505258,"Record function id": 0, "Ev Idx": 2985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142638460.403, "dur": 12.373, + "args": { + "External id": 505259,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142638465.112, "dur": 6.716, + "args": { + "External id": 505260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142638467.239, "dur": 4.470, + "args": { + "External id": 505261,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142638478.179, "dur": 5.733, + "args": { + "External id": 505262,"Record function id": 0, "Ev Idx": 2989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142638479.721, "dur": 3.761, + "args": { + "External id": 505263,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142638480.569, "dur": 2.459, + "args": { + "External id": 505264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142638481.518, "dur": 1.412, + "args": { + "External id": 505265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142638487.636, "dur": 6.792, + "args": { + "External id": 505266,"Record function id": 0, "Ev Idx": 2993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142638489.059, "dur": 4.983, + "args": { + "External id": 505267,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142638489.713, "dur": 3.929, + "args": { + "External id": 505268,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142638490.092, "dur": 3.472, + "args": { + "External id": 505269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142638498.092, "dur": 4.228, + "args": { + "External id": 505270,"Record function id": 0, "Ev Idx": 2997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142638499.296, "dur": 2.630, + "args": { + "External id": 505271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142638499.963, "dur": 1.563, + "args": { + "External id": 505272,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142638500.449, "dur": 1.013, + "args": { + "External id": 505273,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142638505.580, "dur": 3.966, + "args": { + "External id": 505274,"Record function id": 0, "Ev Idx": 3001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142638506.804, "dur": 2.350, + "args": { + "External id": 505275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142638507.406, "dur": 1.359, + "args": { + "External id": 505276,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142638507.727, "dur": 0.973, + "args": { + "External id": 505277,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142638512.968, "dur": 4.277, + "args": { + "External id": 505278,"Record function id": 0, "Ev Idx": 3005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142638514.050, "dur": 2.775, + "args": { + "External id": 505279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142638514.622, "dur": 1.807, + "args": { + "External id": 505280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142638515.215, "dur": 1.103, + "args": { + "External id": 505281,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142638520.666, "dur": 3.826, + "args": { + "External id": 505282,"Record function id": 0, "Ev Idx": 3009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142638522.148, "dur": 1.952, + "args": { + "External id": 505283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142638522.632, "dur": 1.077, + "args": { + "External id": 505284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142638522.930, "dur": 0.684, + "args": { + "External id": 505285,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142638528.105, "dur": 4.140, + "args": { + "External id": 505286,"Record function id": 0, "Ev Idx": 3013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142638529.502, "dur": 2.340, + "args": { + "External id": 505287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142638530.172, "dur": 1.239, + "args": { + "External id": 505288,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142638530.560, "dur": 0.773, + "args": { + "External id": 505289,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142638535.555, "dur": 3.732, + "args": { + "External id": 505290,"Record function id": 0, "Ev Idx": 3017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142638536.880, "dur": 1.998, + "args": { + "External id": 505291,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142638537.347, "dur": 1.147, + "args": { + "External id": 505292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142638537.780, "dur": 0.615, + "args": { + "External id": 505293,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142638543.299, "dur": 66256.088, + "args": { + "External id": 505294,"Record function id": 0, "Sequence number": 5516045, "Fwd thread id": 1, "Ev Idx": 3021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142638544.438, "dur": 66244.681, + "args": { + "External id": 505295,"Sequence number": 5516045, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3022 + } + }, + { + "ph": "f", "id": 189, "pid": 2338711, "tid": 2379440, "ts": 6274142638544.438, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.17)", "pid": 2338711, "tid": 2379440, + "ts": 6274142638577.462, "dur": 68.883, + "args": { + "External id": 505296,"Record function id": 0, "Ev Idx": 3023 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.17)", "pid": 2338711, "tid": 2379440, + "ts": 6274142638658.992, "dur": 83.650, + "args": { + "External id": 505297,"Record function id": 0, "Ev Idx": 3024 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.17)", "pid": 2338711, "tid": 2379440, + "ts": 6274142638749.161, "dur": 66031.382, + "args": { + "External id": 505298,"Record function id": 0, "Ev Idx": 3025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142638853.682, "dur": 8.772, + "args": { + "External id": 505299,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142638874.297, "dur": 8.173, + "args": { + "External id": 505300,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142638899.381, "dur": 64891.332, + "args": { + "External id": 505301,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142638915.040, "dur": 64860.434, + "args": { + "External id": 505302,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142639025.463, "dur": 19.762, + "args": { + "External id": 505303,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142639109.306, "dur": 64613.890, + "args": { + "External id": 505304,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142639113.187, "dur": 64608.541, + "args": { + "External id": 505305,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142639119.573, "dur": 13.130, + "args": { + "External id": 505306,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142639135.387, "dur": 64580.039, + "args": { + "External id": 505307,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142703909.752, "dur": 14.845, + "args": { + "External id": 505308,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142703914.257, "dur": 9.843, + "args": { + "External id": 505309,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142703960.851, "dur": 442.764, + "args": { + "External id": 505310,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142703991.732, "dur": 405.893, + "args": { + "External id": 505311,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3038, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142704004.721, "dur": 386.270, + "args": { + "External id": 505312,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142704428.325, "dur": 2.591, + "args": { + "External id": 505313,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3040, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142704498.394, "dur": 7.714, + "args": { + "External id": 505314,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142704557.925, "dur": 1.541, + "args": { + "External id": 505315,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142704577.275, "dur": 3.264, + "args": { + "External id": 505316,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142704622.331, "dur": 1.983, + "args": { + "External id": 505317,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142704643.182, "dur": 1.080, + "args": { + "External id": 505318,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142704660.698, "dur": 1.279, + "args": { + "External id": 505319,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142704678.266, "dur": 3.570, + "args": { + "External id": 505320,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142704694.987, "dur": 2.412, + "args": { + "External id": 505321,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142704709.124, "dur": 1.063, + "args": { + "External id": 505322,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142704817.477, "dur": 3347.276, + "args": { + "External id": 505323,"Record function id": 0, "Ev Idx": 3050 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.16)", "pid": 2338711, "tid": 2379440, + "ts": 6274142704839.020, "dur": 1302.160, + "args": { + "External id": 505324,"Record function id": 0, "Ev Idx": 3051 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2338711, "tid": 2379440, + "ts": 6274142704857.017, "dur": 433.931, + "args": { + "External id": 505325,"Record function id": 0, "Ev Idx": 3052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142704956.090, "dur": 5.117, + "args": { + "External id": 505326,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142704965.224, "dur": 1.034, + "args": { + "External id": 505327,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142704968.081, "dur": 3.575, + "args": { + "External id": 505328,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142704973.407, "dur": 0.945, + "args": { + "External id": 505329,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142704975.739, "dur": 0.899, + "args": { + "External id": 505330,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142704978.116, "dur": 1.034, + "args": { + "External id": 505331,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142704980.827, "dur": 2.154, + "args": { + "External id": 505332,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142704984.245, "dur": 0.884, + "args": { + "External id": 505333,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142704986.338, "dur": 1.071, + "args": { + "External id": 505334,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142704989.019, "dur": 4.325, + "args": { + "External id": 505335,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142705027.405, "dur": 224.785, + "args": { + "External id": 505336,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142705045.120, "dur": 200.603, + "args": { + "External id": 505337,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142705107.789, "dur": 22.756, + "args": { + "External id": 505338,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142705135.290, "dur": 77.950, + "args": { + "External id": 505339,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142705138.246, "dur": 74.660, + "args": { + "External id": 505340,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142705143.126, "dur": 6.381, + "args": { + "External id": 505341,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142705151.599, "dur": 60.677, + "args": { + "External id": 505342,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3069 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.15", "pid": 2338711, "tid": 2379440, + "ts": 6274142705383.072, "dur": 749.357, + "args": { + "External id": 505343,"Record function id": 0, "Ev Idx": 3070 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2338711, "tid": 2379440, + "ts": 6274142705403.209, "dur": 713.209, + "args": { + "External id": 505344,"Record function id": 0, "Ev Idx": 3071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142705494.637, "dur": 16.482, + "args": { + "External id": 505345,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142705527.515, "dur": 33.504, + "args": { + "External id": 505346,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142705533.324, "dur": 2.002, + "args": { + "External id": 505347,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142705537.923, "dur": 0.710, + "args": { + "External id": 505348,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142705540.318, "dur": 0.623, + "args": { + "External id": 505349,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142705542.649, "dur": 0.534, + "args": { + "External id": 505350,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142705545.082, "dur": 0.748, + "args": { + "External id": 505351,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142705547.192, "dur": 2.274, + "args": { + "External id": 505352,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142705551.270, "dur": 0.457, + "args": { + "External id": 505353,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142705553.296, "dur": 0.538, + "args": { + "External id": 505354,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142705555.663, "dur": 0.343, + "args": { + "External id": 505355,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142705571.812, "dur": 78.499, + "args": { + "External id": 505356,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274142705694.335, "dur": 129.537, + "args": { + "External id": 505357,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142705707.617, "dur": 5.148, + "args": { + "External id": 505358,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274142705718.671, "dur": 12.337, + "args": { + "External id": 505359,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274142705723.664, "dur": 6.888, + "args": { + "External id": 505360,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142705727.952, "dur": 0.815, + "args": { + "External id": 505361,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142705738.752, "dur": 26.770, + "args": { + "External id": 505362,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142705741.825, "dur": 0.479, + "args": { + "External id": 505363,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142705744.105, "dur": 0.525, + "args": { + "External id": 505364,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142705746.754, "dur": 2.695, + "args": { + "External id": 505365,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142705750.818, "dur": 0.372, + "args": { + "External id": 505366,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142705752.779, "dur": 0.466, + "args": { + "External id": 505367,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142705754.505, "dur": 0.426, + "args": { + "External id": 505368,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142705756.605, "dur": 0.409, + "args": { + "External id": 505369,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142705758.750, "dur": 0.392, + "args": { + "External id": 505370,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142705760.859, "dur": 0.491, + "args": { + "External id": 505371,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142705782.460, "dur": 33.055, + "args": { + "External id": 505372,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142705883.462, "dur": 118.702, + "args": { + "External id": 505373,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142705907.182, "dur": 91.060, + "args": { + "External id": 505374,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3101, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142705918.162, "dur": 75.580, + "args": { + "External id": 505375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142706017.300, "dur": 1.933, + "args": { + "External id": 505376,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3103, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142706150.654, "dur": 1990.841, + "args": { + "External id": 505377,"Sequence number": 5516044, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3104 + } + }, + { + "ph": "f", "id": 190, "pid": 2338711, "tid": 2379440, "ts": 6274142706150.654, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142706271.687, "dur": 117.846, + "args": { + "External id": 505378,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142706448.204, "dur": 44.537, + "args": { + "External id": 505379,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274142706511.931, "dur": 54.780, + "args": { + "External id": 505380,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142706579.057, "dur": 66.024, + "args": { + "External id": 505381,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142706660.177, "dur": 51.021, + "args": { + "External id": 505382,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142706722.232, "dur": 34.160, + "args": { + "External id": 505383,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142706765.218, "dur": 30.638, + "args": { + "External id": 505384,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142706827.751, "dur": 28.822, + "args": { + "External id": 505385,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142706874.676, "dur": 36.526, + "args": { + "External id": 505386,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142706931.799, "dur": 22.336, + "args": { + "External id": 505387,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142706967.752, "dur": 21.602, + "args": { + "External id": 505388,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142706998.650, "dur": 40.280, + "args": { + "External id": 505389,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142707042.698, "dur": 76.245, + "args": { + "External id": 505390,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274142707160.056, "dur": 270.747, + "args": { + "External id": 505391,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142707247.035, "dur": 7.159, + "args": { + "External id": 505392,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142707256.417, "dur": 2.840, + "args": { + "External id": 505393,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142707260.735, "dur": 2.336, + "args": { + "External id": 505394,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142707264.521, "dur": 2.202, + "args": { + "External id": 505395,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142707315.340, "dur": 5.863, + "args": { + "External id": 505396,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142707317.643, "dur": 3.363, + "args": { + "External id": 505397,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142707323.780, "dur": 37.536, + "args": { + "External id": 505398,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142707329.781, "dur": 3.866, + "args": { + "External id": 505399,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142707363.448, "dur": 1.963, + "args": { + "External id": 505400,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142707364.572, "dur": 0.759, + "args": { + "External id": 505401,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142707366.982, "dur": 18.719, + "args": { + "External id": 505402,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142707368.890, "dur": 0.650, + "args": { + "External id": 505403,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142707466.609, "dur": 27.762, + "args": { + "External id": 505404,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142707510.598, "dur": 31.574, + "args": { + "External id": 505405,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142707560.015, "dur": 86.658, + "args": { + "External id": 505406,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142707659.569, "dur": 56.539, + "args": { + "External id": 505407,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142707731.221, "dur": 27.456, + "args": { + "External id": 505408,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142707765.413, "dur": 33.082, + "args": { + "External id": 505409,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142707807.505, "dur": 29.907, + "args": { + "External id": 505410,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142707844.707, "dur": 32.627, + "args": { + "External id": 505411,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274142707899.822, "dur": 28.020, + "args": { + "External id": 505412,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142707944.104, "dur": 26.034, + "args": { + "External id": 505413,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142707984.049, "dur": 20.315, + "args": { + "External id": 505414,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142708021.197, "dur": 15.887, + "args": { + "External id": 505415,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274142708049.068, "dur": 55.314, + "args": { + "External id": 505416,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142708190.637, "dur": 17.337, + "args": { + "External id": 505417,"Record function id": 0, "Ev Idx": 3144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142708194.239, "dur": 12.748, + "args": { + "External id": 505418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142708198.952, "dur": 7.072, + "args": { + "External id": 505419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142708201.044, "dur": 4.862, + "args": { + "External id": 505420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142708211.726, "dur": 28.995, + "args": { + "External id": 505421,"Record function id": 0, "Ev Idx": 3148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142708212.991, "dur": 27.120, + "args": { + "External id": 505422,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142708213.640, "dur": 25.915, + "args": { + "External id": 505423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142708238.245, "dur": 1.174, + "args": { + "External id": 505424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142708244.102, "dur": 6.948, + "args": { + "External id": 505425,"Record function id": 0, "Ev Idx": 3152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142708245.411, "dur": 5.107, + "args": { + "External id": 505426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142708246.014, "dur": 4.097, + "args": { + "External id": 505427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142708246.499, "dur": 3.541, + "args": { + "External id": 505428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142708254.293, "dur": 3.985, + "args": { + "External id": 505429,"Record function id": 0, "Ev Idx": 3156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142708255.791, "dur": 2.028, + "args": { + "External id": 505430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142708256.323, "dur": 1.085, + "args": { + "External id": 505431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142708256.602, "dur": 0.732, + "args": { + "External id": 505432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142708261.548, "dur": 3.401, + "args": { + "External id": 505433,"Record function id": 0, "Ev Idx": 3160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142708262.521, "dur": 2.017, + "args": { + "External id": 505434,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142708263.106, "dur": 0.999, + "args": { + "External id": 505435,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142708263.429, "dur": 0.567, + "args": { + "External id": 505436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142708268.219, "dur": 4.246, + "args": { + "External id": 505437,"Record function id": 0, "Ev Idx": 3164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142708269.498, "dur": 2.525, + "args": { + "External id": 505438,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142708270.009, "dur": 1.560, + "args": { + "External id": 505439,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142708270.769, "dur": 0.714, + "args": { + "External id": 505440,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142708275.832, "dur": 5.557, + "args": { + "External id": 505441,"Record function id": 0, "Ev Idx": 3168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142708276.866, "dur": 4.100, + "args": { + "External id": 505442,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142708277.447, "dur": 3.090, + "args": { + "External id": 505443,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142708279.733, "dur": 0.739, + "args": { + "External id": 505444,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142708284.826, "dur": 3.253, + "args": { + "External id": 505445,"Record function id": 0, "Ev Idx": 3172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142708285.805, "dur": 1.860, + "args": { + "External id": 505446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142708286.282, "dur": 0.961, + "args": { + "External id": 505447,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142708286.601, "dur": 0.556, + "args": { + "External id": 505448,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142708291.223, "dur": 3.366, + "args": { + "External id": 505449,"Record function id": 0, "Ev Idx": 3176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142708292.332, "dur": 1.842, + "args": { + "External id": 505450,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142708292.807, "dur": 0.947, + "args": { + "External id": 505451,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142708293.128, "dur": 0.543, + "args": { + "External id": 505452,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142708298.494, "dur": 64461.975, + "args": { + "External id": 505453,"Record function id": 0, "Sequence number": 5516043, "Fwd thread id": 1, "Ev Idx": 3180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142708299.776, "dur": 64450.138, + "args": { + "External id": 505454,"Sequence number": 5516043, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3181 + } + }, + { + "ph": "f", "id": 191, "pid": 2338711, "tid": 2379440, "ts": 6274142708299.776, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.16)", "pid": 2338711, "tid": 2379440, + "ts": 6274142708337.059, "dur": 42.671, + "args": { + "External id": 505455,"Record function id": 0, "Ev Idx": 3182 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.16)", "pid": 2338711, "tid": 2379440, + "ts": 6274142708388.049, "dur": 66.551, + "args": { + "External id": 505456,"Record function id": 0, "Ev Idx": 3183 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.16)", "pid": 2338711, "tid": 2379440, + "ts": 6274142708460.706, "dur": 64280.089, + "args": { + "External id": 505457,"Record function id": 0, "Ev Idx": 3184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142708557.971, "dur": 8.223, + "args": { + "External id": 505458,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142708576.534, "dur": 6.992, + "args": { + "External id": 505459,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142708624.214, "dur": 63157.401, + "args": { + "External id": 505460,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142708643.060, "dur": 63124.282, + "args": { + "External id": 505461,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142708745.775, "dur": 23.005, + "args": { + "External id": 505462,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142708790.007, "dur": 62917.240, + "args": { + "External id": 505463,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142708793.498, "dur": 62912.562, + "args": { + "External id": 505464,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142708798.387, "dur": 12.018, + "args": { + "External id": 505465,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142708812.674, "dur": 62886.831, + "args": { + "External id": 505466,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142771904.927, "dur": 14.838, + "args": { + "External id": 505467,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142771909.321, "dur": 10.079, + "args": { + "External id": 505468,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142771952.814, "dur": 426.818, + "args": { + "External id": 505469,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142771985.218, "dur": 388.528, + "args": { + "External id": 505470,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3197, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142771997.850, "dur": 369.849, + "args": { + "External id": 505471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142772402.579, "dur": 2.360, + "args": { + "External id": 505472,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3199, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142772471.226, "dur": 7.421, + "args": { + "External id": 505473,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142772528.557, "dur": 1.421, + "args": { + "External id": 505474,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142772546.876, "dur": 3.820, + "args": { + "External id": 505475,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142772563.569, "dur": 1.164, + "args": { + "External id": 505476,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142772576.794, "dur": 0.953, + "args": { + "External id": 505477,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142772613.494, "dur": 2.204, + "args": { + "External id": 505478,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142772634.041, "dur": 3.880, + "args": { + "External id": 505479,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142772654.223, "dur": 2.747, + "args": { + "External id": 505480,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142772670.049, "dur": 0.749, + "args": { + "External id": 505481,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142772777.485, "dur": 3115.272, + "args": { + "External id": 505482,"Record function id": 0, "Ev Idx": 3209 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.15)", "pid": 2338711, "tid": 2379440, + "ts": 6274142772799.355, "dur": 1175.286, + "args": { + "External id": 505483,"Record function id": 0, "Ev Idx": 3210 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2338711, "tid": 2379440, + "ts": 6274142772817.321, "dur": 406.306, + "args": { + "External id": 505484,"Record function id": 0, "Ev Idx": 3211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142772910.532, "dur": 4.698, + "args": { + "External id": 505485,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142772919.125, "dur": 1.255, + "args": { + "External id": 505486,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142772922.219, "dur": 2.985, + "args": { + "External id": 505487,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142772927.075, "dur": 0.868, + "args": { + "External id": 505488,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142772929.381, "dur": 0.702, + "args": { + "External id": 505489,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142772931.631, "dur": 0.794, + "args": { + "External id": 505490,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142772933.932, "dur": 2.704, + "args": { + "External id": 505491,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142772938.061, "dur": 0.977, + "args": { + "External id": 505492,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142772940.341, "dur": 0.742, + "args": { + "External id": 505493,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142772942.589, "dur": 0.814, + "args": { + "External id": 505494,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142772961.837, "dur": 222.069, + "args": { + "External id": 505495,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142772991.516, "dur": 186.154, + "args": { + "External id": 505496,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142773010.987, "dur": 17.371, + "args": { + "External id": 505497,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142773032.720, "dur": 108.491, + "args": { + "External id": 505498,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142773035.480, "dur": 105.272, + "args": { + "External id": 505499,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142773039.931, "dur": 5.855, + "args": { + "External id": 505500,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142773047.794, "dur": 91.777, + "args": { + "External id": 505501,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3228 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.14", "pid": 2338711, "tid": 2379440, + "ts": 6274142773313.430, "dur": 652.486, + "args": { + "External id": 505502,"Record function id": 0, "Ev Idx": 3229 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2338711, "tid": 2379440, + "ts": 6274142773332.367, "dur": 619.351, + "args": { + "External id": 505503,"Record function id": 0, "Ev Idx": 3230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142773398.783, "dur": 6.298, + "args": { + "External id": 505504,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142773420.993, "dur": 35.902, + "args": { + "External id": 505505,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142773427.001, "dur": 2.286, + "args": { + "External id": 505506,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142773431.690, "dur": 0.691, + "args": { + "External id": 505507,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142773433.964, "dur": 0.491, + "args": { + "External id": 505508,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142773436.217, "dur": 0.581, + "args": { + "External id": 505509,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142773438.512, "dur": 0.404, + "args": { + "External id": 505510,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142773440.610, "dur": 2.224, + "args": { + "External id": 505511,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142773444.687, "dur": 0.438, + "args": { + "External id": 505512,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142773447.035, "dur": 0.361, + "args": { + "External id": 505513,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142773448.804, "dur": 0.393, + "args": { + "External id": 505514,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142773467.206, "dur": 46.886, + "args": { + "External id": 505515,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274142773546.688, "dur": 159.146, + "args": { + "External id": 505516,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142773557.390, "dur": 3.431, + "args": { + "External id": 505517,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274142773566.421, "dur": 13.929, + "args": { + "External id": 505518,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274142773574.734, "dur": 5.183, + "args": { + "External id": 505519,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142773578.355, "dur": 0.428, + "args": { + "External id": 505520,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142773607.647, "dur": 38.305, + "args": { + "External id": 505521,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142773611.753, "dur": 1.023, + "args": { + "External id": 505522,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142773615.411, "dur": 0.686, + "args": { + "External id": 505523,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142773619.213, "dur": 2.968, + "args": { + "External id": 505524,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142773624.763, "dur": 0.566, + "args": { + "External id": 505525,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142773627.737, "dur": 0.592, + "args": { + "External id": 505526,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142773630.562, "dur": 0.548, + "args": { + "External id": 505527,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142773633.878, "dur": 0.721, + "args": { + "External id": 505528,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142773637.213, "dur": 0.651, + "args": { + "External id": 505529,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142773640.648, "dur": 0.591, + "args": { + "External id": 505530,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142773659.243, "dur": 38.167, + "args": { + "External id": 505531,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142773757.478, "dur": 119.469, + "args": { + "External id": 505532,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142773781.217, "dur": 91.758, + "args": { + "External id": 505533,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3260, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142773792.019, "dur": 76.579, + "args": { + "External id": 505534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142773892.164, "dur": 1.629, + "args": { + "External id": 505535,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3262, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142773981.979, "dur": 1889.488, + "args": { + "External id": 505536,"Sequence number": 5516042, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3263 + } + }, + { + "ph": "f", "id": 192, "pid": 2338711, "tid": 2379440, "ts": 6274142773981.979, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142774140.784, "dur": 113.077, + "args": { + "External id": 505537,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142774294.077, "dur": 38.945, + "args": { + "External id": 505538,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274142774351.664, "dur": 51.724, + "args": { + "External id": 505539,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142774415.646, "dur": 34.081, + "args": { + "External id": 505540,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142774457.369, "dur": 35.646, + "args": { + "External id": 505541,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142774501.348, "dur": 31.009, + "args": { + "External id": 505542,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142774539.554, "dur": 30.841, + "args": { + "External id": 505543,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142774620.018, "dur": 33.275, + "args": { + "External id": 505544,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142774682.211, "dur": 35.833, + "args": { + "External id": 505545,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142774740.929, "dur": 20.537, + "args": { + "External id": 505546,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142774774.283, "dur": 14.757, + "args": { + "External id": 505547,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142774798.040, "dur": 46.843, + "args": { + "External id": 505548,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142774848.701, "dur": 35.186, + "args": { + "External id": 505549,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274142774918.339, "dur": 307.590, + "args": { + "External id": 505550,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142775000.219, "dur": 7.225, + "args": { + "External id": 505551,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142775009.605, "dur": 2.391, + "args": { + "External id": 505552,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142775013.431, "dur": 1.863, + "args": { + "External id": 505553,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142775016.411, "dur": 1.934, + "args": { + "External id": 505554,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142775104.422, "dur": 6.205, + "args": { + "External id": 505555,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142775106.713, "dur": 3.350, + "args": { + "External id": 505556,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142775112.596, "dur": 35.830, + "args": { + "External id": 505557,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142775118.543, "dur": 4.416, + "args": { + "External id": 505558,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142775150.151, "dur": 5.578, + "args": { + "External id": 505559,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142775151.154, "dur": 4.489, + "args": { + "External id": 505560,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142775157.079, "dur": 15.397, + "args": { + "External id": 505561,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142775159.019, "dur": 0.676, + "args": { + "External id": 505562,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142775263.030, "dur": 27.702, + "args": { + "External id": 505563,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142775307.617, "dur": 17.897, + "args": { + "External id": 505564,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142775335.196, "dur": 48.073, + "args": { + "External id": 505565,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142775390.314, "dur": 40.287, + "args": { + "External id": 505566,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142775444.891, "dur": 22.760, + "args": { + "External id": 505567,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142775473.988, "dur": 33.242, + "args": { + "External id": 505568,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142775515.510, "dur": 33.505, + "args": { + "External id": 505569,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142775556.227, "dur": 54.001, + "args": { + "External id": 505570,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274142775638.007, "dur": 29.532, + "args": { + "External id": 505571,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142775683.317, "dur": 43.010, + "args": { + "External id": 505572,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142775749.365, "dur": 22.465, + "args": { + "External id": 505573,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142775790.461, "dur": 14.450, + "args": { + "External id": 505574,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274142775817.753, "dur": 17.767, + "args": { + "External id": 505575,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142775919.921, "dur": 16.550, + "args": { + "External id": 505576,"Record function id": 0, "Ev Idx": 3303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142775923.693, "dur": 11.902, + "args": { + "External id": 505577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142775928.421, "dur": 6.218, + "args": { + "External id": 505578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142775930.048, "dur": 4.480, + "args": { + "External id": 505579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142775940.361, "dur": 4.883, + "args": { + "External id": 505580,"Record function id": 0, "Ev Idx": 3307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142775941.775, "dur": 3.061, + "args": { + "External id": 505581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142775942.438, "dur": 1.932, + "args": { + "External id": 505582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142775943.250, "dur": 1.023, + "args": { + "External id": 505583,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142775948.749, "dur": 6.641, + "args": { + "External id": 505584,"Record function id": 0, "Ev Idx": 3311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142775949.801, "dur": 5.164, + "args": { + "External id": 505585,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142775950.354, "dur": 4.113, + "args": { + "External id": 505586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142775950.755, "dur": 3.639, + "args": { + "External id": 505587,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142775958.873, "dur": 4.333, + "args": { + "External id": 505588,"Record function id": 0, "Ev Idx": 3315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142775960.214, "dur": 2.576, + "args": { + "External id": 505589,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142775960.937, "dur": 1.415, + "args": { + "External id": 505590,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142775961.421, "dur": 0.866, + "args": { + "External id": 505591,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142775966.468, "dur": 3.613, + "args": { + "External id": 505592,"Record function id": 0, "Ev Idx": 3319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142775967.737, "dur": 1.951, + "args": { + "External id": 505593,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142775968.248, "dur": 1.013, + "args": { + "External id": 505594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142775968.568, "dur": 0.607, + "args": { + "External id": 505595,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142775973.349, "dur": 4.018, + "args": { + "External id": 505596,"Record function id": 0, "Ev Idx": 3323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142775974.421, "dur": 2.516, + "args": { + "External id": 505597,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142775975.142, "dur": 1.326, + "args": { + "External id": 505598,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142775975.715, "dur": 0.659, + "args": { + "External id": 505599,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142775980.492, "dur": 3.828, + "args": { + "External id": 505600,"Record function id": 0, "Ev Idx": 3327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142775981.656, "dur": 2.251, + "args": { + "External id": 505601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142775982.228, "dur": 1.224, + "args": { + "External id": 505602,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142775982.548, "dur": 0.794, + "args": { + "External id": 505603,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142775987.646, "dur": 3.791, + "args": { + "External id": 505604,"Record function id": 0, "Ev Idx": 3331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142775988.731, "dur": 2.290, + "args": { + "External id": 505605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142775989.327, "dur": 1.270, + "args": { + "External id": 505606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142775989.755, "dur": 0.765, + "args": { + "External id": 505607,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142775994.695, "dur": 3.688, + "args": { + "External id": 505608,"Record function id": 0, "Ev Idx": 3335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142775995.904, "dur": 2.094, + "args": { + "External id": 505609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142775996.341, "dur": 1.181, + "args": { + "External id": 505610,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142775996.840, "dur": 0.581, + "args": { + "External id": 505611,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142776001.899, "dur": 66488.921, + "args": { + "External id": 505612,"Record function id": 0, "Sequence number": 5516041, "Fwd thread id": 1, "Ev Idx": 3339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142776003.228, "dur": 66477.940, + "args": { + "External id": 505613,"Sequence number": 5516041, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3340 + } + }, + { + "ph": "f", "id": 193, "pid": 2338711, "tid": 2379440, "ts": 6274142776003.228, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.15)", "pid": 2338711, "tid": 2379440, + "ts": 6274142776035.715, "dur": 78.363, + "args": { + "External id": 505614,"Record function id": 0, "Ev Idx": 3341 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.15)", "pid": 2338711, "tid": 2379440, + "ts": 6274142776124.182, "dur": 64.278, + "args": { + "External id": 505615,"Record function id": 0, "Ev Idx": 3342 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.15)", "pid": 2338711, "tid": 2379440, + "ts": 6274142776194.727, "dur": 66278.304, + "args": { + "External id": 505616,"Record function id": 0, "Ev Idx": 3343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142776289.522, "dur": 8.547, + "args": { + "External id": 505617,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142776309.467, "dur": 7.101, + "args": { + "External id": 505618,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142776332.436, "dur": 65151.106, + "args": { + "External id": 505619,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142776346.524, "dur": 65123.023, + "args": { + "External id": 505620,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142776482.951, "dur": 19.539, + "args": { + "External id": 505621,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142776523.012, "dur": 64892.789, + "args": { + "External id": 505622,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142776526.193, "dur": 64888.432, + "args": { + "External id": 505623,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142776531.235, "dur": 9.439, + "args": { + "External id": 505624,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142776543.206, "dur": 64865.159, + "args": { + "External id": 505625,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142841614.913, "dur": 18.372, + "args": { + "External id": 505626,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142841620.499, "dur": 11.958, + "args": { + "External id": 505627,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142841671.424, "dur": 466.915, + "args": { + "External id": 505628,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142841701.977, "dur": 429.696, + "args": { + "External id": 505629,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3356, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142841714.868, "dur": 409.963, + "args": { + "External id": 505630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142842163.029, "dur": 2.646, + "args": { + "External id": 505631,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3358, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142842236.518, "dur": 7.727, + "args": { + "External id": 505632,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142842292.933, "dur": 1.356, + "args": { + "External id": 505633,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142842311.374, "dur": 3.132, + "args": { + "External id": 505634,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142842328.775, "dur": 0.934, + "args": { + "External id": 505635,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142842341.314, "dur": 0.875, + "args": { + "External id": 505636,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142842354.469, "dur": 1.013, + "args": { + "External id": 505637,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142842367.719, "dur": 3.547, + "args": { + "External id": 505638,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142842390.722, "dur": 2.426, + "args": { + "External id": 505639,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142842404.155, "dur": 0.727, + "args": { + "External id": 505640,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142842505.880, "dur": 3229.759, + "args": { + "External id": 505641,"Record function id": 0, "Ev Idx": 3368 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.14)", "pid": 2338711, "tid": 2379440, + "ts": 6274142842527.197, "dur": 1229.399, + "args": { + "External id": 505642,"Record function id": 0, "Ev Idx": 3369 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2338711, "tid": 2379440, + "ts": 6274142842544.585, "dur": 404.022, + "args": { + "External id": 505643,"Record function id": 0, "Ev Idx": 3370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142842669.064, "dur": 6.638, + "args": { + "External id": 505644,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142842680.621, "dur": 1.250, + "args": { + "External id": 505645,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142842684.685, "dur": 3.401, + "args": { + "External id": 505646,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142842690.554, "dur": 1.483, + "args": { + "External id": 505647,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142842694.307, "dur": 1.352, + "args": { + "External id": 505648,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142842698.478, "dur": 1.115, + "args": { + "External id": 505649,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142842702.082, "dur": 2.439, + "args": { + "External id": 505650,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142842706.941, "dur": 1.004, + "args": { + "External id": 505651,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142842710.169, "dur": 1.030, + "args": { + "External id": 505652,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142842713.508, "dur": 1.222, + "args": { + "External id": 505653,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142842735.748, "dur": 171.376, + "args": { + "External id": 505654,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142842753.496, "dur": 148.412, + "args": { + "External id": 505655,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142842774.963, "dur": 17.828, + "args": { + "External id": 505656,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142842797.093, "dur": 74.284, + "args": { + "External id": 505657,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142842800.078, "dur": 70.965, + "args": { + "External id": 505658,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142842804.930, "dur": 5.473, + "args": { + "External id": 505659,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142842812.283, "dur": 57.875, + "args": { + "External id": 505660,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3387 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.13", "pid": 2338711, "tid": 2379440, + "ts": 6274142843040.779, "dur": 707.221, + "args": { + "External id": 505661,"Record function id": 0, "Ev Idx": 3388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2338711, "tid": 2379440, + "ts": 6274142843103.560, "dur": 630.340, + "args": { + "External id": 505662,"Record function id": 0, "Ev Idx": 3389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142843171.715, "dur": 10.915, + "args": { + "External id": 505663,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142843198.673, "dur": 41.339, + "args": { + "External id": 505664,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142843209.192, "dur": 1.921, + "args": { + "External id": 505665,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142843213.628, "dur": 0.690, + "args": { + "External id": 505666,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142843216.263, "dur": 0.396, + "args": { + "External id": 505667,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142843218.415, "dur": 0.387, + "args": { + "External id": 505668,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142843220.262, "dur": 0.448, + "args": { + "External id": 505669,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142843222.517, "dur": 2.522, + "args": { + "External id": 505670,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142843226.667, "dur": 0.309, + "args": { + "External id": 505671,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142843228.830, "dur": 0.404, + "args": { + "External id": 505672,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142843230.943, "dur": 0.462, + "args": { + "External id": 505673,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142843249.826, "dur": 53.170, + "args": { + "External id": 505674,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274142843336.020, "dur": 115.389, + "args": { + "External id": 505675,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142843346.727, "dur": 3.574, + "args": { + "External id": 505676,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274142843355.671, "dur": 10.995, + "args": { + "External id": 505677,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274142843360.474, "dur": 5.767, + "args": { + "External id": 505678,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142843364.428, "dur": 0.596, + "args": { + "External id": 505679,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142843373.210, "dur": 25.730, + "args": { + "External id": 505680,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142843375.598, "dur": 0.505, + "args": { + "External id": 505681,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142843377.740, "dur": 0.579, + "args": { + "External id": 505682,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142843380.271, "dur": 2.391, + "args": { + "External id": 505683,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142843384.427, "dur": 0.547, + "args": { + "External id": 505684,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142843386.897, "dur": 0.355, + "args": { + "External id": 505685,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142843388.928, "dur": 0.423, + "args": { + "External id": 505686,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142843391.039, "dur": 0.399, + "args": { + "External id": 505687,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142843392.951, "dur": 0.482, + "args": { + "External id": 505688,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142843394.921, "dur": 0.368, + "args": { + "External id": 505689,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142843411.424, "dur": 32.371, + "args": { + "External id": 505690,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142843498.239, "dur": 141.887, + "args": { + "External id": 505691,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142843520.500, "dur": 114.875, + "args": { + "External id": 505692,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3419, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142843530.829, "dur": 95.114, + "args": { + "External id": 505693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142843659.890, "dur": 2.328, + "args": { + "External id": 505694,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3421, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142843764.690, "dur": 1949.611, + "args": { + "External id": 505695,"Sequence number": 5516040, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3422 + } + }, + { + "ph": "f", "id": 194, "pid": 2338711, "tid": 2379440, "ts": 6274142843764.690, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142843879.632, "dur": 122.963, + "args": { + "External id": 505696,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142844041.153, "dur": 86.760, + "args": { + "External id": 505697,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274142844150.727, "dur": 61.530, + "args": { + "External id": 505698,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142844225.412, "dur": 34.186, + "args": { + "External id": 505699,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142844266.928, "dur": 33.647, + "args": { + "External id": 505700,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142844307.875, "dur": 29.811, + "args": { + "External id": 505701,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142844344.480, "dur": 30.676, + "args": { + "External id": 505702,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142844401.554, "dur": 25.923, + "args": { + "External id": 505703,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142844445.193, "dur": 27.994, + "args": { + "External id": 505704,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142844492.797, "dur": 20.582, + "args": { + "External id": 505705,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142844525.558, "dur": 16.010, + "args": { + "External id": 505706,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142844550.193, "dur": 59.888, + "args": { + "External id": 505707,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142844617.756, "dur": 50.617, + "args": { + "External id": 505708,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274142844710.552, "dur": 276.017, + "args": { + "External id": 505709,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142844800.751, "dur": 6.206, + "args": { + "External id": 505710,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142844809.142, "dur": 2.449, + "args": { + "External id": 505711,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142844812.980, "dur": 2.022, + "args": { + "External id": 505712,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142844816.432, "dur": 2.180, + "args": { + "External id": 505713,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142844874.725, "dur": 5.536, + "args": { + "External id": 505714,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142844877.066, "dur": 3.002, + "args": { + "External id": 505715,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142844882.509, "dur": 38.076, + "args": { + "External id": 505716,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142844888.012, "dur": 3.666, + "args": { + "External id": 505717,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142844922.655, "dur": 2.132, + "args": { + "External id": 505718,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142844923.956, "dur": 0.758, + "args": { + "External id": 505719,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142844926.293, "dur": 15.063, + "args": { + "External id": 505720,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142844928.287, "dur": 0.618, + "args": { + "External id": 505721,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142845022.974, "dur": 31.313, + "args": { + "External id": 505722,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142845116.535, "dur": 20.286, + "args": { + "External id": 505723,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142845148.257, "dur": 71.662, + "args": { + "External id": 505724,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142845232.961, "dur": 50.535, + "args": { + "External id": 505725,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142845296.859, "dur": 23.799, + "args": { + "External id": 505726,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142845330.255, "dur": 32.629, + "args": { + "External id": 505727,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142845370.525, "dur": 31.739, + "args": { + "External id": 505728,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142845409.457, "dur": 34.838, + "args": { + "External id": 505729,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274142845464.725, "dur": 26.181, + "args": { + "External id": 505730,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142845509.358, "dur": 25.617, + "args": { + "External id": 505731,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142845548.247, "dur": 19.003, + "args": { + "External id": 505732,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142845583.755, "dur": 41.103, + "args": { + "External id": 505733,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274142845649.792, "dur": 25.161, + "args": { + "External id": 505734,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142845759.469, "dur": 16.158, + "args": { + "External id": 505735,"Record function id": 0, "Ev Idx": 3462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142845762.926, "dur": 11.605, + "args": { + "External id": 505736,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142845767.488, "dur": 6.273, + "args": { + "External id": 505737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142845769.221, "dur": 4.416, + "args": { + "External id": 505738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142845779.760, "dur": 5.237, + "args": { + "External id": 505739,"Record function id": 0, "Ev Idx": 3466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142845781.132, "dur": 3.435, + "args": { + "External id": 505740,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142845782.028, "dur": 2.119, + "args": { + "External id": 505741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142845782.776, "dur": 1.272, + "args": { + "External id": 505742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142845788.401, "dur": 6.439, + "args": { + "External id": 505743,"Record function id": 0, "Ev Idx": 3470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142845789.788, "dur": 4.652, + "args": { + "External id": 505744,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142845790.386, "dur": 3.641, + "args": { + "External id": 505745,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142845790.744, "dur": 3.202, + "args": { + "External id": 505746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142845798.263, "dur": 3.646, + "args": { + "External id": 505747,"Record function id": 0, "Ev Idx": 3474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142845799.450, "dur": 2.060, + "args": { + "External id": 505748,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142845799.981, "dur": 1.132, + "args": { + "External id": 505749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142845800.308, "dur": 0.733, + "args": { + "External id": 505750,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142845804.996, "dur": 3.563, + "args": { + "External id": 505751,"Record function id": 0, "Ev Idx": 3478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142845806.187, "dur": 1.992, + "args": { + "External id": 505752,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142845806.638, "dur": 1.161, + "args": { + "External id": 505753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142845807.072, "dur": 0.651, + "args": { + "External id": 505754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142845811.603, "dur": 3.763, + "args": { + "External id": 505755,"Record function id": 0, "Ev Idx": 3482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142845812.857, "dur": 2.113, + "args": { + "External id": 505756,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142845813.314, "dur": 1.273, + "args": { + "External id": 505757,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142845813.720, "dur": 0.750, + "args": { + "External id": 505758,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142845818.450, "dur": 3.461, + "args": { + "External id": 505759,"Record function id": 0, "Ev Idx": 3486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142845819.574, "dur": 1.912, + "args": { + "External id": 505760,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142845820.146, "dur": 0.949, + "args": { + "External id": 505761,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142845820.405, "dur": 0.562, + "args": { + "External id": 505762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142845824.915, "dur": 3.072, + "args": { + "External id": 505763,"Record function id": 0, "Ev Idx": 3490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142845825.842, "dur": 1.759, + "args": { + "External id": 505764,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142845826.270, "dur": 0.923, + "args": { + "External id": 505765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142845826.554, "dur": 0.552, + "args": { + "External id": 505766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142845830.966, "dur": 3.425, + "args": { + "External id": 505767,"Record function id": 0, "Ev Idx": 3494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142845831.957, "dur": 2.055, + "args": { + "External id": 505768,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142845832.394, "dur": 1.227, + "args": { + "External id": 505769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142845832.868, "dur": 0.649, + "args": { + "External id": 505770,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142845838.209, "dur": 62087.962, + "args": { + "External id": 505771,"Record function id": 0, "Sequence number": 5516039, "Fwd thread id": 1, "Ev Idx": 3498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142845839.411, "dur": 62076.806, + "args": { + "External id": 505772,"Sequence number": 5516039, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3499 + } + }, + { + "ph": "f", "id": 195, "pid": 2338711, "tid": 2379440, "ts": 6274142845839.411, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.14)", "pid": 2338711, "tid": 2379440, + "ts": 6274142845872.534, "dur": 43.201, + "args": { + "External id": 505773,"Record function id": 0, "Ev Idx": 3500 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.14)", "pid": 2338711, "tid": 2379440, + "ts": 6274142845923.773, "dur": 65.765, + "args": { + "External id": 505774,"Record function id": 0, "Ev Idx": 3501 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.14)", "pid": 2338711, "tid": 2379440, + "ts": 6274142845995.855, "dur": 61911.902, + "args": { + "External id": 505775,"Record function id": 0, "Ev Idx": 3502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142846132.226, "dur": 9.293, + "args": { + "External id": 505776,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142846153.283, "dur": 7.306, + "args": { + "External id": 505777,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142846178.703, "dur": 60782.075, + "args": { + "External id": 505778,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142846194.095, "dur": 60752.497, + "args": { + "External id": 505779,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142846323.833, "dur": 19.314, + "args": { + "External id": 505780,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142846363.756, "dur": 60527.274, + "args": { + "External id": 505781,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142846366.891, "dur": 60522.708, + "args": { + "External id": 505782,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142846372.008, "dur": 10.535, + "args": { + "External id": 505783,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142846384.851, "dur": 60498.385, + "args": { + "External id": 505784,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142907110.552, "dur": 13.759, + "args": { + "External id": 505785,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142907114.847, "dur": 8.809, + "args": { + "External id": 505786,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142907156.712, "dur": 400.853, + "args": { + "External id": 505787,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142907185.402, "dur": 366.784, + "args": { + "External id": 505788,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3515, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142907198.257, "dur": 348.202, + "args": { + "External id": 505789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142907578.356, "dur": 2.306, + "args": { + "External id": 505790,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3517, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142907680.097, "dur": 7.826, + "args": { + "External id": 505791,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142907742.643, "dur": 1.644, + "args": { + "External id": 505792,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142907763.662, "dur": 3.088, + "args": { + "External id": 505793,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142907778.748, "dur": 1.138, + "args": { + "External id": 505794,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142907790.029, "dur": 0.988, + "args": { + "External id": 505795,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142907801.466, "dur": 1.044, + "args": { + "External id": 505796,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142907814.399, "dur": 2.880, + "args": { + "External id": 505797,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142907828.080, "dur": 2.057, + "args": { + "External id": 505798,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142907840.490, "dur": 0.914, + "args": { + "External id": 505799,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142907943.471, "dur": 3316.652, + "args": { + "External id": 505800,"Record function id": 0, "Ev Idx": 3527 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.13)", "pid": 2338711, "tid": 2379440, + "ts": 6274142907963.904, "dur": 1251.146, + "args": { + "External id": 505801,"Record function id": 0, "Ev Idx": 3528 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2338711, "tid": 2379440, + "ts": 6274142907980.470, "dur": 416.581, + "args": { + "External id": 505802,"Record function id": 0, "Ev Idx": 3529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142908120.204, "dur": 5.537, + "args": { + "External id": 505803,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142908129.726, "dur": 1.040, + "args": { + "External id": 505804,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142908132.353, "dur": 3.615, + "args": { + "External id": 505805,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142908137.432, "dur": 0.873, + "args": { + "External id": 505806,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142908139.900, "dur": 0.927, + "args": { + "External id": 505807,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142908142.397, "dur": 0.920, + "args": { + "External id": 505808,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142908144.908, "dur": 2.181, + "args": { + "External id": 505809,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142908148.315, "dur": 0.996, + "args": { + "External id": 505810,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142908150.527, "dur": 0.774, + "args": { + "External id": 505811,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142908152.573, "dur": 0.762, + "args": { + "External id": 505812,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142908172.642, "dur": 188.371, + "args": { + "External id": 505813,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142908189.878, "dur": 165.908, + "args": { + "External id": 505814,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142908220.534, "dur": 16.834, + "args": { + "External id": 505815,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142908241.467, "dur": 78.919, + "args": { + "External id": 505816,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142908244.379, "dur": 75.700, + "args": { + "External id": 505817,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142908248.664, "dur": 5.764, + "args": { + "External id": 505818,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142908256.266, "dur": 62.874, + "args": { + "External id": 505819,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3546 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.12", "pid": 2338711, "tid": 2379440, + "ts": 6274142908487.524, "dur": 718.680, + "args": { + "External id": 505820,"Record function id": 0, "Ev Idx": 3547 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2338711, "tid": 2379440, + "ts": 6274142908505.742, "dur": 686.041, + "args": { + "External id": 505821,"Record function id": 0, "Ev Idx": 3548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142908564.005, "dur": 5.909, + "args": { + "External id": 505822,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142908606.527, "dur": 51.091, + "args": { + "External id": 505823,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142908613.138, "dur": 2.133, + "args": { + "External id": 505824,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142908618.928, "dur": 0.683, + "args": { + "External id": 505825,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142908622.381, "dur": 0.614, + "args": { + "External id": 505826,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142908624.978, "dur": 0.585, + "args": { + "External id": 505827,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142908628.109, "dur": 0.725, + "args": { + "External id": 505828,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142908631.010, "dur": 2.832, + "args": { + "External id": 505829,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142908636.562, "dur": 0.656, + "args": { + "External id": 505830,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142908647.883, "dur": 0.642, + "args": { + "External id": 505831,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142908650.699, "dur": 0.678, + "args": { + "External id": 505832,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142908677.846, "dur": 57.479, + "args": { + "External id": 505833,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274142908773.977, "dur": 121.237, + "args": { + "External id": 505834,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142908788.634, "dur": 5.260, + "args": { + "External id": 505835,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274142908799.816, "dur": 10.767, + "args": { + "External id": 505836,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274142908804.438, "dur": 5.734, + "args": { + "External id": 505837,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142908808.203, "dur": 0.637, + "args": { + "External id": 505838,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142908818.140, "dur": 28.456, + "args": { + "External id": 505839,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142908820.494, "dur": 0.471, + "args": { + "External id": 505840,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142908822.529, "dur": 0.524, + "args": { + "External id": 505841,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142908824.552, "dur": 2.599, + "args": { + "External id": 505842,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142908828.643, "dur": 0.432, + "args": { + "External id": 505843,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142908830.604, "dur": 0.598, + "args": { + "External id": 505844,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142908832.982, "dur": 0.466, + "args": { + "External id": 505845,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142908834.934, "dur": 0.388, + "args": { + "External id": 505846,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142908836.869, "dur": 4.065, + "args": { + "External id": 505847,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142908842.188, "dur": 0.355, + "args": { + "External id": 505848,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142908856.950, "dur": 30.293, + "args": { + "External id": 505849,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142908950.987, "dur": 159.222, + "args": { + "External id": 505850,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142908973.379, "dur": 132.409, + "args": { + "External id": 505851,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3578, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142908983.817, "dur": 115.777, + "args": { + "External id": 505852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142909129.000, "dur": 2.515, + "args": { + "External id": 505853,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3580, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142909222.642, "dur": 2014.759, + "args": { + "External id": 505854,"Sequence number": 5516038, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3581 + } + }, + { + "ph": "f", "id": 196, "pid": 2338711, "tid": 2379440, "ts": 6274142909222.642, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142909336.739, "dur": 118.265, + "args": { + "External id": 505855,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142909494.684, "dur": 40.433, + "args": { + "External id": 505856,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274142909584.347, "dur": 104.506, + "args": { + "External id": 505857,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142909708.330, "dur": 36.122, + "args": { + "External id": 505858,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142909752.357, "dur": 36.373, + "args": { + "External id": 505859,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142909796.670, "dur": 29.817, + "args": { + "External id": 505860,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142909833.467, "dur": 31.783, + "args": { + "External id": 505861,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142909898.046, "dur": 29.017, + "args": { + "External id": 505862,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142909944.980, "dur": 35.680, + "args": { + "External id": 505863,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142910000.475, "dur": 24.456, + "args": { + "External id": 505864,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142910037.299, "dur": 62.390, + "args": { + "External id": 505865,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142910112.886, "dur": 44.144, + "args": { + "External id": 505866,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142910161.376, "dur": 35.800, + "args": { + "External id": 505867,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274142910230.683, "dur": 261.088, + "args": { + "External id": 505868,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142910314.373, "dur": 6.758, + "args": { + "External id": 505869,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142910323.422, "dur": 2.395, + "args": { + "External id": 505870,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142910327.102, "dur": 2.055, + "args": { + "External id": 505871,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142910330.237, "dur": 1.874, + "args": { + "External id": 505872,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142910382.626, "dur": 5.707, + "args": { + "External id": 505873,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142910384.851, "dur": 3.295, + "args": { + "External id": 505874,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142910390.605, "dur": 37.361, + "args": { + "External id": 505875,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142910395.985, "dur": 3.953, + "args": { + "External id": 505876,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142910429.838, "dur": 1.967, + "args": { + "External id": 505877,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142910431.089, "dur": 0.645, + "args": { + "External id": 505878,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142910433.194, "dur": 14.851, + "args": { + "External id": 505879,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142910435.022, "dur": 0.701, + "args": { + "External id": 505880,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142910525.507, "dur": 30.513, + "args": { + "External id": 505881,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142910572.312, "dur": 47.396, + "args": { + "External id": 505882,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142910634.513, "dur": 64.961, + "args": { + "External id": 505883,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142910710.344, "dur": 67.098, + "args": { + "External id": 505884,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142910798.628, "dur": 29.408, + "args": { + "External id": 505885,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142910834.911, "dur": 46.111, + "args": { + "External id": 505886,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142910889.278, "dur": 33.537, + "args": { + "External id": 505887,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142910929.678, "dur": 31.983, + "args": { + "External id": 505888,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274142910985.123, "dur": 28.966, + "args": { + "External id": 505889,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142911030.661, "dur": 71.489, + "args": { + "External id": 505890,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142911119.923, "dur": 20.188, + "args": { + "External id": 505891,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142911158.305, "dur": 15.389, + "args": { + "External id": 505892,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274142911187.467, "dur": 17.254, + "args": { + "External id": 505893,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142911283.347, "dur": 17.033, + "args": { + "External id": 505894,"Record function id": 0, "Ev Idx": 3621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142911287.214, "dur": 12.165, + "args": { + "External id": 505895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142911291.710, "dur": 6.627, + "args": { + "External id": 505896,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142911293.699, "dur": 4.513, + "args": { + "External id": 505897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142911304.232, "dur": 5.339, + "args": { + "External id": 505898,"Record function id": 0, "Ev Idx": 3625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142911305.710, "dur": 3.404, + "args": { + "External id": 505899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142911306.467, "dur": 2.162, + "args": { + "External id": 505900,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142911307.320, "dur": 1.207, + "args": { + "External id": 505901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142911312.823, "dur": 6.451, + "args": { + "External id": 505902,"Record function id": 0, "Ev Idx": 3629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142911314.243, "dur": 4.600, + "args": { + "External id": 505903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142911314.755, "dur": 3.630, + "args": { + "External id": 505904,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142911315.070, "dur": 3.218, + "args": { + "External id": 505905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142911322.467, "dur": 4.027, + "args": { + "External id": 505906,"Record function id": 0, "Ev Idx": 3633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142911323.912, "dur": 2.165, + "args": { + "External id": 505907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142911324.429, "dur": 1.230, + "args": { + "External id": 505908,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142911324.735, "dur": 0.850, + "args": { + "External id": 505909,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142911329.819, "dur": 3.700, + "args": { + "External id": 505910,"Record function id": 0, "Ev Idx": 3637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142911331.099, "dur": 2.024, + "args": { + "External id": 505911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142911331.777, "dur": 0.945, + "args": { + "External id": 505912,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142911332.067, "dur": 0.578, + "args": { + "External id": 505913,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142911336.720, "dur": 3.660, + "args": { + "External id": 505914,"Record function id": 0, "Ev Idx": 3641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142911337.825, "dur": 2.143, + "args": { + "External id": 505915,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142911338.312, "dur": 1.238, + "args": { + "External id": 505916,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142911338.902, "dur": 0.539, + "args": { + "External id": 505917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142911343.454, "dur": 3.737, + "args": { + "External id": 505918,"Record function id": 0, "Ev Idx": 3645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142911344.585, "dur": 2.196, + "args": { + "External id": 505919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142911345.028, "dur": 1.355, + "args": { + "External id": 505920,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142911345.514, "dur": 0.778, + "args": { + "External id": 505921,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142911350.346, "dur": 3.800, + "args": { + "External id": 505922,"Record function id": 0, "Ev Idx": 3649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142911351.580, "dur": 2.124, + "args": { + "External id": 505923,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142911352.238, "dur": 1.045, + "args": { + "External id": 505924,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142911352.492, "dur": 0.714, + "args": { + "External id": 505925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142911357.218, "dur": 4.261, + "args": { + "External id": 505926,"Record function id": 0, "Ev Idx": 3653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142911358.770, "dur": 2.291, + "args": { + "External id": 505927,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142911359.257, "dur": 1.409, + "args": { + "External id": 505928,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142911359.850, "dur": 0.672, + "args": { + "External id": 505929,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142911365.261, "dur": 62181.450, + "args": { + "External id": 505930,"Record function id": 0, "Sequence number": 5516037, "Fwd thread id": 1, "Ev Idx": 3657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142911366.666, "dur": 62170.715, + "args": { + "External id": 505931,"Sequence number": 5516037, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3658 + } + }, + { + "ph": "f", "id": 197, "pid": 2338711, "tid": 2379440, "ts": 6274142911366.666, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.13)", "pid": 2338711, "tid": 2379440, + "ts": 6274142911399.927, "dur": 42.363, + "args": { + "External id": 505932,"Record function id": 0, "Ev Idx": 3659 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.13)", "pid": 2338711, "tid": 2379440, + "ts": 6274142911450.459, "dur": 64.947, + "args": { + "External id": 505933,"Record function id": 0, "Ev Idx": 3660 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.13)", "pid": 2338711, "tid": 2379440, + "ts": 6274142911520.874, "dur": 62007.316, + "args": { + "External id": 505934,"Record function id": 0, "Ev Idx": 3661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142911642.368, "dur": 9.906, + "args": { + "External id": 505935,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142911665.332, "dur": 8.441, + "args": { + "External id": 505936,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142911693.122, "dur": 60914.756, + "args": { + "External id": 505937,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142911711.709, "dur": 60867.212, + "args": { + "External id": 505938,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142911837.148, "dur": 19.487, + "args": { + "External id": 505939,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142911877.183, "dur": 60646.726, + "args": { + "External id": 505940,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142911880.271, "dur": 60642.287, + "args": { + "External id": 505941,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142911885.199, "dur": 10.573, + "args": { + "External id": 505942,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142911898.124, "dur": 60617.539, + "args": { + "External id": 505943,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142972737.590, "dur": 15.180, + "args": { + "External id": 505944,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142972741.923, "dur": 10.293, + "args": { + "External id": 505945,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142972784.645, "dur": 417.750, + "args": { + "External id": 505946,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142972813.945, "dur": 382.810, + "args": { + "External id": 505947,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3674, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142972826.512, "dur": 363.682, + "args": { + "External id": 505948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142973226.306, "dur": 2.530, + "args": { + "External id": 505949,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3676, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142973297.515, "dur": 7.487, + "args": { + "External id": 505950,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142973353.588, "dur": 1.518, + "args": { + "External id": 505951,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142973370.443, "dur": 2.980, + "args": { + "External id": 505952,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142973385.323, "dur": 1.173, + "args": { + "External id": 505953,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142973398.688, "dur": 1.597, + "args": { + "External id": 505954,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142973411.678, "dur": 1.018, + "args": { + "External id": 505955,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142973424.850, "dur": 2.960, + "args": { + "External id": 505956,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142973439.182, "dur": 2.411, + "args": { + "External id": 505957,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142973452.608, "dur": 0.854, + "args": { + "External id": 505958,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142973561.657, "dur": 3237.048, + "args": { + "External id": 505959,"Record function id": 0, "Ev Idx": 3686 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.12)", "pid": 2338711, "tid": 2379440, + "ts": 6274142973583.048, "dur": 1269.413, + "args": { + "External id": 505960,"Record function id": 0, "Ev Idx": 3687 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2338711, "tid": 2379440, + "ts": 6274142973625.785, "dur": 388.886, + "args": { + "External id": 505961,"Record function id": 0, "Ev Idx": 3688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142973737.970, "dur": 5.814, + "args": { + "External id": 505962,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142973747.332, "dur": 0.987, + "args": { + "External id": 505963,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142973749.922, "dur": 3.836, + "args": { + "External id": 505964,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142973755.164, "dur": 0.928, + "args": { + "External id": 505965,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142973757.655, "dur": 0.828, + "args": { + "External id": 505966,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142973760.126, "dur": 0.785, + "args": { + "External id": 505967,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142973762.195, "dur": 2.044, + "args": { + "External id": 505968,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142973765.453, "dur": 0.898, + "args": { + "External id": 505969,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142973767.450, "dur": 3.923, + "args": { + "External id": 505970,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142973772.691, "dur": 0.753, + "args": { + "External id": 505971,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142973791.841, "dur": 186.666, + "args": { + "External id": 505972,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142973827.252, "dur": 145.944, + "args": { + "External id": 505973,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142973848.001, "dur": 19.187, + "args": { + "External id": 505974,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142973871.318, "dur": 71.561, + "args": { + "External id": 505975,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142973874.385, "dur": 68.083, + "args": { + "External id": 505976,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142973878.755, "dur": 5.702, + "args": { + "External id": 505977,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142973886.511, "dur": 55.520, + "args": { + "External id": 505978,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.11", "pid": 2338711, "tid": 2379440, + "ts": 6274142974169.798, "dur": 673.855, + "args": { + "External id": 505979,"Record function id": 0, "Ev Idx": 3706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2338711, "tid": 2379440, + "ts": 6274142974191.100, "dur": 637.587, + "args": { + "External id": 505980,"Record function id": 0, "Ev Idx": 3707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142974258.010, "dur": 6.807, + "args": { + "External id": 505981,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142974284.717, "dur": 33.476, + "args": { + "External id": 505982,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142974290.482, "dur": 1.998, + "args": { + "External id": 505983,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142974295.061, "dur": 0.733, + "args": { + "External id": 505984,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142974297.472, "dur": 0.798, + "args": { + "External id": 505985,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142974300.044, "dur": 0.737, + "args": { + "External id": 505986,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142974302.724, "dur": 0.430, + "args": { + "External id": 505987,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142974305.152, "dur": 2.615, + "args": { + "External id": 505988,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142974309.283, "dur": 0.398, + "args": { + "External id": 505989,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142974311.171, "dur": 0.437, + "args": { + "External id": 505990,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142974313.238, "dur": 0.469, + "args": { + "External id": 505991,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142974328.729, "dur": 48.015, + "args": { + "External id": 505992,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274142974415.198, "dur": 114.320, + "args": { + "External id": 505993,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142974426.310, "dur": 4.218, + "args": { + "External id": 505994,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274142974435.687, "dur": 11.235, + "args": { + "External id": 505995,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274142974440.787, "dur": 5.684, + "args": { + "External id": 505996,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142974444.350, "dur": 0.775, + "args": { + "External id": 505997,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274142974453.918, "dur": 25.120, + "args": { + "External id": 505998,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142974456.311, "dur": 0.462, + "args": { + "External id": 505999,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142974458.545, "dur": 0.445, + "args": { + "External id": 506000,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142974460.623, "dur": 2.470, + "args": { + "External id": 506001,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142974464.773, "dur": 0.445, + "args": { + "External id": 506002,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142974466.461, "dur": 0.466, + "args": { + "External id": 506003,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142974468.649, "dur": 0.286, + "args": { + "External id": 506004,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142974470.573, "dur": 0.396, + "args": { + "External id": 506005,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142974472.660, "dur": 0.443, + "args": { + "External id": 506006,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142974474.493, "dur": 0.418, + "args": { + "External id": 506007,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142974490.338, "dur": 31.195, + "args": { + "External id": 506008,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274142974575.049, "dur": 168.507, + "args": { + "External id": 506009,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142974621.086, "dur": 117.983, + "args": { + "External id": 506010,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3737, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274142974634.727, "dur": 98.972, + "args": { + "External id": 506011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274142974761.055, "dur": 1.930, + "args": { + "External id": 506012,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3739, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142974860.648, "dur": 1916.432, + "args": { + "External id": 506013,"Sequence number": 5516036, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3740 + } + }, + { + "ph": "f", "id": 198, "pid": 2338711, "tid": 2379440, "ts": 6274142974860.648, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142974980.956, "dur": 154.016, + "args": { + "External id": 506014,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142975179.025, "dur": 41.703, + "args": { + "External id": 506015,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274142975240.118, "dur": 58.740, + "args": { + "External id": 506016,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142975312.339, "dur": 34.342, + "args": { + "External id": 506017,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142975354.361, "dur": 35.546, + "args": { + "External id": 506018,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142975397.431, "dur": 29.564, + "args": { + "External id": 506019,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142975434.614, "dur": 30.459, + "args": { + "External id": 506020,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142975493.453, "dur": 22.957, + "args": { + "External id": 506021,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274142975534.038, "dur": 32.487, + "args": { + "External id": 506022,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142975602.237, "dur": 28.550, + "args": { + "External id": 506023,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142975651.114, "dur": 20.403, + "args": { + "External id": 506024,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142975685.581, "dur": 49.381, + "args": { + "External id": 506025,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142975739.419, "dur": 35.237, + "args": { + "External id": 506026,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274142975824.798, "dur": 302.545, + "args": { + "External id": 506027,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142975913.370, "dur": 6.466, + "args": { + "External id": 506028,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142975922.183, "dur": 2.493, + "args": { + "External id": 506029,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142975926.273, "dur": 1.883, + "args": { + "External id": 506030,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142975929.443, "dur": 1.841, + "args": { + "External id": 506031,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142975980.828, "dur": 5.451, + "args": { + "External id": 506032,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142975983.030, "dur": 3.054, + "args": { + "External id": 506033,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142975988.637, "dur": 34.475, + "args": { + "External id": 506034,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142975993.862, "dur": 4.324, + "args": { + "External id": 506035,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274142976024.984, "dur": 1.643, + "args": { + "External id": 506036,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142976025.757, "dur": 0.801, + "args": { + "External id": 506037,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274142976027.897, "dur": 15.379, + "args": { + "External id": 506038,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142976029.731, "dur": 0.878, + "args": { + "External id": 506039,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274142976168.332, "dur": 32.280, + "args": { + "External id": 506040,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142976217.485, "dur": 18.001, + "args": { + "External id": 506041,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142976244.835, "dur": 47.994, + "args": { + "External id": 506042,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142976300.457, "dur": 41.103, + "args": { + "External id": 506043,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142976352.762, "dur": 21.449, + "args": { + "External id": 506044,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142976380.807, "dur": 33.408, + "args": { + "External id": 506045,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142976422.607, "dur": 29.337, + "args": { + "External id": 506046,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274142976459.009, "dur": 31.533, + "args": { + "External id": 506047,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274142976507.999, "dur": 22.739, + "args": { + "External id": 506048,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142976558.996, "dur": 54.324, + "args": { + "External id": 506049,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274142976640.831, "dur": 26.079, + "args": { + "External id": 506050,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274142976691.243, "dur": 18.608, + "args": { + "External id": 506051,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274142976724.864, "dur": 16.962, + "args": { + "External id": 506052,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142976826.177, "dur": 16.811, + "args": { + "External id": 506053,"Record function id": 0, "Ev Idx": 3780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142976830.054, "dur": 11.898, + "args": { + "External id": 506054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142976834.909, "dur": 6.021, + "args": { + "External id": 506055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142976836.474, "dur": 4.358, + "args": { + "External id": 506056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142976846.708, "dur": 5.061, + "args": { + "External id": 506057,"Record function id": 0, "Ev Idx": 3784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142976848.369, "dur": 2.974, + "args": { + "External id": 506058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142976849.164, "dur": 1.661, + "args": { + "External id": 506059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142976849.616, "dur": 1.077, + "args": { + "External id": 506060,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142976855.133, "dur": 6.856, + "args": { + "External id": 506061,"Record function id": 0, "Ev Idx": 3788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142976856.979, "dur": 4.598, + "args": { + "External id": 506062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142976857.676, "dur": 3.439, + "args": { + "External id": 506063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142976857.989, "dur": 3.033, + "args": { + "External id": 506064,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142976865.108, "dur": 5.287, + "args": { + "External id": 506065,"Record function id": 0, "Ev Idx": 3792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142976866.526, "dur": 3.443, + "args": { + "External id": 506066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142976867.367, "dur": 2.176, + "args": { + "External id": 506067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142976868.658, "dur": 0.674, + "args": { + "External id": 506068,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142976873.530, "dur": 4.881, + "args": { + "External id": 506069,"Record function id": 0, "Ev Idx": 3796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142976875.062, "dur": 2.915, + "args": { + "External id": 506070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142976875.858, "dur": 1.633, + "args": { + "External id": 506071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142976876.487, "dur": 0.897, + "args": { + "External id": 506072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142976881.513, "dur": 3.760, + "args": { + "External id": 506073,"Record function id": 0, "Ev Idx": 3800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142976882.839, "dur": 2.046, + "args": { + "External id": 506074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142976883.308, "dur": 1.132, + "args": { + "External id": 506075,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142976883.688, "dur": 0.645, + "args": { + "External id": 506076,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142976888.425, "dur": 4.964, + "args": { + "External id": 506077,"Record function id": 0, "Ev Idx": 3804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142976889.535, "dur": 3.366, + "args": { + "External id": 506078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142976890.083, "dur": 2.392, + "args": { + "External id": 506079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142976891.636, "dur": 0.704, + "args": { + "External id": 506080,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142976896.606, "dur": 3.571, + "args": { + "External id": 506081,"Record function id": 0, "Ev Idx": 3808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142976897.738, "dur": 2.033, + "args": { + "External id": 506082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142976898.332, "dur": 0.992, + "args": { + "External id": 506083,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142976898.707, "dur": 0.543, + "args": { + "External id": 506084,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142976903.258, "dur": 3.944, + "args": { + "External id": 506085,"Record function id": 0, "Ev Idx": 3812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274142976904.460, "dur": 2.344, + "args": { + "External id": 506086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142976905.046, "dur": 1.370, + "args": { + "External id": 506087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274142976905.442, "dur": 0.681, + "args": { + "External id": 506088,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142976911.111, "dur": 63666.166, + "args": { + "External id": 506089,"Record function id": 0, "Sequence number": 5516035, "Fwd thread id": 1, "Ev Idx": 3816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274142976912.255, "dur": 63655.903, + "args": { + "External id": 506090,"Sequence number": 5516035, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3817 + } + }, + { + "ph": "f", "id": 199, "pid": 2338711, "tid": 2379440, "ts": 6274142976912.255, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.12)", "pid": 2338711, "tid": 2379440, + "ts": 6274142976944.511, "dur": 41.616, + "args": { + "External id": 506091,"Record function id": 0, "Ev Idx": 3818 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.12)", "pid": 2338711, "tid": 2379440, + "ts": 6274142976994.476, "dur": 104.833, + "args": { + "External id": 506092,"Record function id": 0, "Ev Idx": 3819 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.12)", "pid": 2338711, "tid": 2379440, + "ts": 6274142977107.743, "dur": 63452.071, + "args": { + "External id": 506093,"Record function id": 0, "Ev Idx": 3820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142977205.838, "dur": 8.216, + "args": { + "External id": 506094,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274142977225.330, "dur": 7.404, + "args": { + "External id": 506095,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142977248.753, "dur": 62293.873, + "args": { + "External id": 506096,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274142977264.171, "dur": 62264.673, + "args": { + "External id": 506097,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274142977366.247, "dur": 19.114, + "args": { + "External id": 506098,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274142977405.357, "dur": 62066.067, + "args": { + "External id": 506099,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274142977408.895, "dur": 62061.202, + "args": { + "External id": 506100,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274142977413.972, "dur": 11.424, + "args": { + "External id": 506101,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274142977427.294, "dur": 62036.103, + "args": { + "External id": 506102,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143039693.771, "dur": 14.084, + "args": { + "External id": 506103,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143039697.629, "dur": 9.605, + "args": { + "External id": 506104,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143039744.707, "dur": 499.938, + "args": { + "External id": 506105,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143039781.413, "dur": 457.003, + "args": { + "External id": 506106,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3833, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143039793.472, "dur": 438.587, + "args": { + "External id": 506107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143040274.079, "dur": 2.810, + "args": { + "External id": 506108,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3835, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143040350.285, "dur": 7.730, + "args": { + "External id": 506109,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143040403.633, "dur": 1.266, + "args": { + "External id": 506110,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143040419.920, "dur": 3.492, + "args": { + "External id": 506111,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143040435.414, "dur": 1.103, + "args": { + "External id": 506112,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143040446.878, "dur": 0.862, + "args": { + "External id": 506113,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143040457.674, "dur": 0.612, + "args": { + "External id": 506114,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143040469.834, "dur": 2.982, + "args": { + "External id": 506115,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143040482.280, "dur": 2.266, + "args": { + "External id": 506116,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143040493.489, "dur": 0.932, + "args": { + "External id": 506117,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143040612.567, "dur": 3161.670, + "args": { + "External id": 506118,"Record function id": 0, "Ev Idx": 3845 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.11)", "pid": 2338711, "tid": 2379440, + "ts": 6274143040640.229, "dur": 1180.891, + "args": { + "External id": 506119,"Record function id": 0, "Ev Idx": 3846 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2338711, "tid": 2379440, + "ts": 6274143040659.361, "dur": 363.115, + "args": { + "External id": 506120,"Record function id": 0, "Ev Idx": 3847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143040762.994, "dur": 5.827, + "args": { + "External id": 506121,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143040772.864, "dur": 1.032, + "args": { + "External id": 506122,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143040775.833, "dur": 3.124, + "args": { + "External id": 506123,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143040781.086, "dur": 0.688, + "args": { + "External id": 506124,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143040783.450, "dur": 0.783, + "args": { + "External id": 506125,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143040787.855, "dur": 0.862, + "args": { + "External id": 506126,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143040790.370, "dur": 2.236, + "args": { + "External id": 506127,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143040793.992, "dur": 0.657, + "args": { + "External id": 506128,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143040796.010, "dur": 0.754, + "args": { + "External id": 506129,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143040799.988, "dur": 0.779, + "args": { + "External id": 506130,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143040819.625, "dur": 168.197, + "args": { + "External id": 506131,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143040836.601, "dur": 146.347, + "args": { + "External id": 506132,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143040859.403, "dur": 15.767, + "args": { + "External id": 506133,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143040878.958, "dur": 73.118, + "args": { + "External id": 506134,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143040881.964, "dur": 69.753, + "args": { + "External id": 506135,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143040886.352, "dur": 6.783, + "args": { + "External id": 506136,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143040894.950, "dur": 56.090, + "args": { + "External id": 506137,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3864 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.10", "pid": 2338711, "tid": 2379440, + "ts": 6274143041156.575, "dur": 656.487, + "args": { + "External id": 506138,"Record function id": 0, "Ev Idx": 3865 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2338711, "tid": 2379440, + "ts": 6274143041177.209, "dur": 623.162, + "args": { + "External id": 506139,"Record function id": 0, "Ev Idx": 3866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143041237.375, "dur": 6.961, + "args": { + "External id": 506140,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143041261.126, "dur": 34.580, + "args": { + "External id": 506141,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143041267.061, "dur": 1.967, + "args": { + "External id": 506142,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143041271.136, "dur": 1.590, + "args": { + "External id": 506143,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143041274.136, "dur": 0.639, + "args": { + "External id": 506144,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143041276.363, "dur": 0.550, + "args": { + "External id": 506145,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143041279.787, "dur": 0.496, + "args": { + "External id": 506146,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143041281.620, "dur": 2.653, + "args": { + "External id": 506147,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143041286.304, "dur": 0.335, + "args": { + "External id": 506148,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143041288.293, "dur": 0.258, + "args": { + "External id": 506149,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143041290.452, "dur": 0.268, + "args": { + "External id": 506150,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143041307.367, "dur": 46.075, + "args": { + "External id": 506151,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274143041386.895, "dur": 111.503, + "args": { + "External id": 506152,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143041396.888, "dur": 3.806, + "args": { + "External id": 506153,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274143041406.003, "dur": 10.521, + "args": { + "External id": 506154,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274143041410.705, "dur": 5.383, + "args": { + "External id": 506155,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143041414.248, "dur": 0.678, + "args": { + "External id": 506156,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143041423.313, "dur": 28.005, + "args": { + "External id": 506157,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143041425.721, "dur": 0.280, + "args": { + "External id": 506158,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143041428.079, "dur": 0.524, + "args": { + "External id": 506159,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143041434.088, "dur": 2.647, + "args": { + "External id": 506160,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143041438.613, "dur": 0.508, + "args": { + "External id": 506161,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143041440.419, "dur": 0.406, + "args": { + "External id": 506162,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143041442.115, "dur": 0.348, + "args": { + "External id": 506163,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143041444.180, "dur": 0.347, + "args": { + "External id": 506164,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143041445.865, "dur": 0.261, + "args": { + "External id": 506165,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143041447.425, "dur": 0.330, + "args": { + "External id": 506166,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143041461.368, "dur": 29.609, + "args": { + "External id": 506167,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143041541.951, "dur": 172.102, + "args": { + "External id": 506168,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143041574.944, "dur": 134.164, + "args": { + "External id": 506169,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3896, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143041603.714, "dur": 100.478, + "args": { + "External id": 506170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143041735.822, "dur": 1.824, + "args": { + "External id": 506171,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3898, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143041828.853, "dur": 1921.953, + "args": { + "External id": 506172,"Sequence number": 5516034, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3899 + } + }, + { + "ph": "f", "id": 200, "pid": 2338711, "tid": 2379440, "ts": 6274143041828.853, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143041946.484, "dur": 155.020, + "args": { + "External id": 506173,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143042151.889, "dur": 41.969, + "args": { + "External id": 506174,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274143042211.818, "dur": 57.327, + "args": { + "External id": 506175,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143042282.088, "dur": 32.060, + "args": { + "External id": 506176,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143042321.072, "dur": 32.622, + "args": { + "External id": 506177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143042360.194, "dur": 28.978, + "args": { + "External id": 506178,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143042396.448, "dur": 28.959, + "args": { + "External id": 506179,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143042451.962, "dur": 23.249, + "args": { + "External id": 506180,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143042493.311, "dur": 29.437, + "args": { + "External id": 506181,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143042545.269, "dur": 19.619, + "args": { + "External id": 506182,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143042581.206, "dur": 40.063, + "args": { + "External id": 506183,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143042635.320, "dur": 56.797, + "args": { + "External id": 506184,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143042697.369, "dur": 44.754, + "args": { + "External id": 506185,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274143042776.549, "dur": 262.459, + "args": { + "External id": 506186,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143042866.893, "dur": 6.294, + "args": { + "External id": 506187,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143042875.085, "dur": 2.153, + "args": { + "External id": 506188,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143042878.685, "dur": 1.963, + "args": { + "External id": 506189,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143042882.097, "dur": 1.596, + "args": { + "External id": 506190,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143042932.852, "dur": 6.915, + "args": { + "External id": 506191,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143042935.116, "dur": 4.437, + "args": { + "External id": 506192,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143042941.866, "dur": 36.141, + "args": { + "External id": 506193,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143042949.449, "dur": 3.505, + "args": { + "External id": 506194,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143042979.348, "dur": 1.770, + "args": { + "External id": 506195,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143042980.478, "dur": 0.567, + "args": { + "External id": 506196,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143042982.000, "dur": 14.397, + "args": { + "External id": 506197,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143042984.083, "dur": 0.635, + "args": { + "External id": 506198,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143043122.014, "dur": 33.635, + "args": { + "External id": 506199,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143043177.723, "dur": 17.612, + "args": { + "External id": 506200,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143043204.762, "dur": 68.100, + "args": { + "External id": 506201,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143043286.317, "dur": 45.684, + "args": { + "External id": 506202,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143043344.322, "dur": 21.768, + "args": { + "External id": 506203,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143043371.513, "dur": 34.258, + "args": { + "External id": 506204,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143043412.854, "dur": 29.346, + "args": { + "External id": 506205,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143043448.059, "dur": 30.788, + "args": { + "External id": 506206,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274143043505.148, "dur": 24.596, + "args": { + "External id": 506207,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143043546.010, "dur": 24.313, + "args": { + "External id": 506208,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143043604.984, "dur": 26.528, + "args": { + "External id": 506209,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143043660.309, "dur": 20.674, + "args": { + "External id": 506210,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274143043699.813, "dur": 18.661, + "args": { + "External id": 506211,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143043798.698, "dur": 17.506, + "args": { + "External id": 506212,"Record function id": 0, "Ev Idx": 3939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143043802.430, "dur": 12.733, + "args": { + "External id": 506213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143043807.051, "dur": 7.333, + "args": { + "External id": 506214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143043808.832, "dur": 5.452, + "args": { + "External id": 506215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143043820.382, "dur": 5.066, + "args": { + "External id": 506216,"Record function id": 0, "Ev Idx": 3943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143043821.985, "dur": 3.022, + "args": { + "External id": 506217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143043823.168, "dur": 1.336, + "args": { + "External id": 506218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143043823.595, "dur": 0.797, + "args": { + "External id": 506219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143043828.514, "dur": 7.047, + "args": { + "External id": 506220,"Record function id": 0, "Ev Idx": 3947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143043830.000, "dur": 5.166, + "args": { + "External id": 506221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143043830.804, "dur": 3.910, + "args": { + "External id": 506222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143043831.677, "dur": 2.964, + "args": { + "External id": 506223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143043838.840, "dur": 4.279, + "args": { + "External id": 506224,"Record function id": 0, "Ev Idx": 3951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143043840.226, "dur": 2.482, + "args": { + "External id": 506225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143043840.980, "dur": 1.341, + "args": { + "External id": 506226,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143043841.487, "dur": 0.747, + "args": { + "External id": 506227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143043846.123, "dur": 3.802, + "args": { + "External id": 506228,"Record function id": 0, "Ev Idx": 3955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143043847.430, "dur": 2.104, + "args": { + "External id": 506229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143043848.040, "dur": 1.113, + "args": { + "External id": 506230,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143043848.395, "dur": 0.672, + "args": { + "External id": 506231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143043852.897, "dur": 4.041, + "args": { + "External id": 506232,"Record function id": 0, "Ev Idx": 3959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143043854.172, "dur": 2.364, + "args": { + "External id": 506233,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143043854.797, "dur": 1.284, + "args": { + "External id": 506234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143043855.326, "dur": 0.682, + "args": { + "External id": 506235,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143043860.198, "dur": 17.359, + "args": { + "External id": 506236,"Record function id": 0, "Ev Idx": 3963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143043861.827, "dur": 15.267, + "args": { + "External id": 506237,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143043862.287, "dur": 14.275, + "args": { + "External id": 506238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143043875.588, "dur": 0.886, + "args": { + "External id": 506239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143043880.851, "dur": 4.483, + "args": { + "External id": 506240,"Record function id": 0, "Ev Idx": 3967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143043882.148, "dur": 2.756, + "args": { + "External id": 506241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143043883.107, "dur": 1.334, + "args": { + "External id": 506242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143043883.655, "dur": 0.702, + "args": { + "External id": 506243,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143043888.461, "dur": 3.727, + "args": { + "External id": 506244,"Record function id": 0, "Ev Idx": 3971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143043889.648, "dur": 2.132, + "args": { + "External id": 506245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143043890.094, "dur": 1.232, + "args": { + "External id": 506246,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143043890.513, "dur": 0.734, + "args": { + "External id": 506247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143043896.416, "dur": 61581.767, + "args": { + "External id": 506248,"Record function id": 0, "Sequence number": 5516033, "Fwd thread id": 1, "Ev Idx": 3975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143043897.969, "dur": 61570.840, + "args": { + "External id": 506249,"Sequence number": 5516033, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3976 + } + }, + { + "ph": "f", "id": 201, "pid": 2338711, "tid": 2379440, "ts": 6274143043897.969, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.11)", "pid": 2338711, "tid": 2379440, + "ts": 6274143043931.160, "dur": 42.460, + "args": { + "External id": 506250,"Record function id": 0, "Ev Idx": 3977 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.11)", "pid": 2338711, "tid": 2379440, + "ts": 6274143043982.193, "dur": 68.743, + "args": { + "External id": 506251,"Record function id": 0, "Ev Idx": 3978 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.11)", "pid": 2338711, "tid": 2379440, + "ts": 6274143044096.054, "dur": 61364.121, + "args": { + "External id": 506252,"Record function id": 0, "Ev Idx": 3979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143044198.815, "dur": 9.003, + "args": { + "External id": 506253,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143044218.862, "dur": 7.210, + "args": { + "External id": 506254,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143044243.157, "dur": 60328.714, + "args": { + "External id": 506255,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143044258.983, "dur": 60298.023, + "args": { + "External id": 506256,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143044353.205, "dur": 22.361, + "args": { + "External id": 506257,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143044395.523, "dur": 60110.757, + "args": { + "External id": 506258,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143044398.836, "dur": 60106.109, + "args": { + "External id": 506259,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143044404.085, "dur": 12.293, + "args": { + "External id": 506260,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143044418.391, "dur": 60080.353, + "args": { + "External id": 506261,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143104716.996, "dur": 14.443, + "args": { + "External id": 506262,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143104720.889, "dur": 9.902, + "args": { + "External id": 506263,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143104763.419, "dur": 379.361, + "args": { + "External id": 506264,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143104799.090, "dur": 337.890, + "args": { + "External id": 506265,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3992, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143104812.814, "dur": 316.551, + "args": { + "External id": 506266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143105172.330, "dur": 2.452, + "args": { + "External id": 506267,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3994, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143105246.192, "dur": 10.520, + "args": { + "External id": 506268,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143105303.213, "dur": 1.393, + "args": { + "External id": 506269,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143105320.486, "dur": 3.387, + "args": { + "External id": 506270,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143105335.325, "dur": 0.739, + "args": { + "External id": 506271,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143105346.450, "dur": 0.903, + "args": { + "External id": 506272,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143105357.217, "dur": 0.941, + "args": { + "External id": 506273,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143105368.493, "dur": 3.087, + "args": { + "External id": 506274,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143105381.580, "dur": 2.238, + "args": { + "External id": 506275,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143105395.386, "dur": 0.682, + "args": { + "External id": 506276,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143105493.433, "dur": 3281.419, + "args": { + "External id": 506277,"Record function id": 0, "Ev Idx": 4004 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.10)", "pid": 2338711, "tid": 2379440, + "ts": 6274143105514.184, "dur": 1244.177, + "args": { + "External id": 506278,"Record function id": 0, "Ev Idx": 4005 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2338711, "tid": 2379440, + "ts": 6274143105531.028, "dur": 405.845, + "args": { + "External id": 506279,"Record function id": 0, "Ev Idx": 4006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143105648.082, "dur": 6.085, + "args": { + "External id": 506280,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143105659.301, "dur": 1.075, + "args": { + "External id": 506281,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143105663.220, "dur": 3.899, + "args": { + "External id": 506282,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143105670.038, "dur": 0.947, + "args": { + "External id": 506283,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143105673.340, "dur": 1.057, + "args": { + "External id": 506284,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143105676.936, "dur": 1.334, + "args": { + "External id": 506285,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143105680.642, "dur": 2.662, + "args": { + "External id": 506286,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143105688.050, "dur": 0.945, + "args": { + "External id": 506287,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143105691.689, "dur": 0.978, + "args": { + "External id": 506288,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143105694.536, "dur": 0.608, + "args": { + "External id": 506289,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143105716.573, "dur": 182.453, + "args": { + "External id": 506290,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143105733.824, "dur": 159.936, + "args": { + "External id": 506291,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143105761.625, "dur": 18.132, + "args": { + "External id": 506292,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143105783.575, "dur": 80.076, + "args": { + "External id": 506293,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143105786.273, "dur": 77.086, + "args": { + "External id": 506294,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143105791.035, "dur": 10.495, + "args": { + "External id": 506295,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143105803.051, "dur": 59.719, + "args": { + "External id": 506296,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4023 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.9", "pid": 2338711, "tid": 2379440, + "ts": 6274143106031.046, "dur": 718.270, + "args": { + "External id": 506297,"Record function id": 0, "Ev Idx": 4024 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2338711, "tid": 2379440, + "ts": 6274143106051.184, "dur": 682.970, + "args": { + "External id": 506298,"Record function id": 0, "Ev Idx": 4025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143106161.522, "dur": 7.669, + "args": { + "External id": 506299,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143106185.871, "dur": 36.927, + "args": { + "External id": 506300,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143106191.834, "dur": 3.591, + "args": { + "External id": 506301,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143106197.647, "dur": 0.374, + "args": { + "External id": 506302,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143106199.474, "dur": 0.453, + "args": { + "External id": 506303,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143106203.033, "dur": 0.318, + "args": { + "External id": 506304,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143106204.826, "dur": 0.366, + "args": { + "External id": 506305,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143106207.010, "dur": 2.750, + "args": { + "External id": 506306,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143106212.280, "dur": 0.326, + "args": { + "External id": 506307,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143106214.326, "dur": 0.377, + "args": { + "External id": 506308,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143106216.081, "dur": 1.641, + "args": { + "External id": 506309,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143106234.437, "dur": 48.115, + "args": { + "External id": 506310,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274143106315.781, "dur": 121.741, + "args": { + "External id": 506311,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143106326.072, "dur": 3.828, + "args": { + "External id": 506312,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274143106335.260, "dur": 10.960, + "args": { + "External id": 506313,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274143106339.895, "dur": 5.899, + "args": { + "External id": 506314,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143106343.769, "dur": 0.715, + "args": { + "External id": 506315,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143106353.681, "dur": 26.151, + "args": { + "External id": 506316,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143106356.335, "dur": 0.349, + "args": { + "External id": 506317,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143106358.631, "dur": 0.457, + "args": { + "External id": 506318,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143106360.655, "dur": 3.253, + "args": { + "External id": 506319,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143106365.482, "dur": 0.424, + "args": { + "External id": 506320,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143106367.245, "dur": 0.399, + "args": { + "External id": 506321,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143106370.276, "dur": 0.468, + "args": { + "External id": 506322,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143106372.117, "dur": 0.278, + "args": { + "External id": 506323,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143106373.803, "dur": 0.387, + "args": { + "External id": 506324,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143106375.641, "dur": 0.384, + "args": { + "External id": 506325,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143106396.582, "dur": 32.950, + "args": { + "External id": 506326,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143106484.120, "dur": 150.618, + "args": { + "External id": 506327,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143106518.036, "dur": 111.940, + "args": { + "External id": 506328,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4055, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143106528.505, "dur": 95.713, + "args": { + "External id": 506329,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143106658.839, "dur": 2.772, + "args": { + "External id": 506330,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4057, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143106766.341, "dur": 1986.142, + "args": { + "External id": 506331,"Sequence number": 5516032, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4058 + } + }, + { + "ph": "f", "id": 202, "pid": 2338711, "tid": 2379440, "ts": 6274143106766.341, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143106891.457, "dur": 116.717, + "args": { + "External id": 506332,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143107092.910, "dur": 45.109, + "args": { + "External id": 506333,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274143107159.055, "dur": 60.863, + "args": { + "External id": 506334,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143107232.399, "dur": 33.143, + "args": { + "External id": 506335,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143107271.992, "dur": 34.023, + "args": { + "External id": 506336,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143107312.416, "dur": 28.873, + "args": { + "External id": 506337,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143107349.256, "dur": 30.253, + "args": { + "External id": 506338,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143107407.554, "dur": 26.344, + "args": { + "External id": 506339,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143107453.752, "dur": 34.619, + "args": { + "External id": 506340,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143107516.973, "dur": 20.174, + "args": { + "External id": 506341,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143107550.772, "dur": 17.306, + "args": { + "External id": 506342,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143107580.232, "dur": 71.785, + "args": { + "External id": 506343,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143107659.792, "dur": 46.263, + "args": { + "External id": 506344,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274143107744.717, "dur": 253.716, + "args": { + "External id": 506345,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143107826.100, "dur": 5.990, + "args": { + "External id": 506346,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143107834.256, "dur": 4.922, + "args": { + "External id": 506347,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143107840.647, "dur": 1.627, + "args": { + "External id": 506348,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143107843.271, "dur": 1.591, + "args": { + "External id": 506349,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143107891.694, "dur": 5.474, + "args": { + "External id": 506350,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143107893.932, "dur": 3.026, + "args": { + "External id": 506351,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143107899.686, "dur": 33.770, + "args": { + "External id": 506352,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143107906.009, "dur": 3.389, + "args": { + "External id": 506353,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143107935.054, "dur": 1.970, + "args": { + "External id": 506354,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143107936.304, "dur": 0.624, + "args": { + "External id": 506355,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143107938.029, "dur": 17.232, + "args": { + "External id": 506356,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143107941.657, "dur": 1.760, + "args": { + "External id": 506357,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143108038.884, "dur": 68.472, + "args": { + "External id": 506358,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143108131.933, "dur": 19.243, + "args": { + "External id": 506359,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143108160.936, "dur": 51.150, + "args": { + "External id": 506360,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143108218.803, "dur": 38.683, + "args": { + "External id": 506361,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143108268.332, "dur": 41.388, + "args": { + "External id": 506362,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143108323.643, "dur": 35.753, + "args": { + "External id": 506363,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143108367.906, "dur": 28.334, + "args": { + "External id": 506364,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143108402.394, "dur": 32.797, + "args": { + "External id": 506365,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274143108459.724, "dur": 29.288, + "args": { + "External id": 506366,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143108508.648, "dur": 23.571, + "args": { + "External id": 506367,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143108549.710, "dur": 17.021, + "args": { + "External id": 506368,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143108642.793, "dur": 26.011, + "args": { + "External id": 506369,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274143108694.100, "dur": 24.120, + "args": { + "External id": 506370,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143108799.092, "dur": 16.420, + "args": { + "External id": 506371,"Record function id": 0, "Ev Idx": 4098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143108802.911, "dur": 11.623, + "args": { + "External id": 506372,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143108807.520, "dur": 6.061, + "args": { + "External id": 506373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143108809.081, "dur": 4.411, + "args": { + "External id": 506374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143108819.293, "dur": 5.190, + "args": { + "External id": 506375,"Record function id": 0, "Ev Idx": 4102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143108821.155, "dur": 2.863, + "args": { + "External id": 506376,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143108822.081, "dur": 1.458, + "args": { + "External id": 506377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143108822.620, "dur": 0.808, + "args": { + "External id": 506378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143108827.628, "dur": 7.475, + "args": { + "External id": 506379,"Record function id": 0, "Ev Idx": 4106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143108829.304, "dur": 5.365, + "args": { + "External id": 506380,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143108830.108, "dur": 4.124, + "args": { + "External id": 506381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143108830.818, "dur": 3.331, + "args": { + "External id": 506382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143108838.237, "dur": 4.167, + "args": { + "External id": 506383,"Record function id": 0, "Ev Idx": 4110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143108839.552, "dur": 2.444, + "args": { + "External id": 506384,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143108840.421, "dur": 1.150, + "args": { + "External id": 506385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143108840.708, "dur": 0.780, + "args": { + "External id": 506386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143108845.478, "dur": 3.513, + "args": { + "External id": 506387,"Record function id": 0, "Ev Idx": 4114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143108846.675, "dur": 1.919, + "args": { + "External id": 506388,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143108847.102, "dur": 1.053, + "args": { + "External id": 506389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143108847.415, "dur": 0.677, + "args": { + "External id": 506390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143108851.928, "dur": 4.158, + "args": { + "External id": 506391,"Record function id": 0, "Ev Idx": 4118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143108853.571, "dur": 2.107, + "args": { + "External id": 506392,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143108853.984, "dur": 1.292, + "args": { + "External id": 506393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143108854.566, "dur": 0.631, + "args": { + "External id": 506394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143108859.361, "dur": 3.691, + "args": { + "External id": 506395,"Record function id": 0, "Ev Idx": 4122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143108860.705, "dur": 1.956, + "args": { + "External id": 506396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143108861.329, "dur": 0.936, + "args": { + "External id": 506397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143108861.612, "dur": 0.576, + "args": { + "External id": 506398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143108866.103, "dur": 5.332, + "args": { + "External id": 506399,"Record function id": 0, "Ev Idx": 4126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143108867.125, "dur": 3.914, + "args": { + "External id": 506400,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143108867.596, "dur": 3.026, + "args": { + "External id": 506401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143108869.903, "dur": 0.632, + "args": { + "External id": 506402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143108874.560, "dur": 3.955, + "args": { + "External id": 506403,"Record function id": 0, "Ev Idx": 4130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143108876.295, "dur": 1.812, + "args": { + "External id": 506404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143108876.714, "dur": 0.989, + "args": { + "External id": 506405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143108876.993, "dur": 0.623, + "args": { + "External id": 506406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143108882.692, "dur": 61351.057, + "args": { + "External id": 506407,"Record function id": 0, "Sequence number": 5516031, "Fwd thread id": 1, "Ev Idx": 4134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143108884.721, "dur": 61339.089, + "args": { + "External id": 506408,"Sequence number": 5516031, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4135 + } + }, + { + "ph": "f", "id": 203, "pid": 2338711, "tid": 2379440, "ts": 6274143108884.721, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.10)", "pid": 2338711, "tid": 2379440, + "ts": 6274143108917.534, "dur": 43.042, + "args": { + "External id": 506409,"Record function id": 0, "Ev Idx": 4136 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.10)", "pid": 2338711, "tid": 2379440, + "ts": 6274143108969.194, "dur": 66.562, + "args": { + "External id": 506410,"Record function id": 0, "Ev Idx": 4137 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.10)", "pid": 2338711, "tid": 2379440, + "ts": 6274143109041.801, "dur": 61173.531, + "args": { + "External id": 506411,"Record function id": 0, "Ev Idx": 4138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143109185.060, "dur": 9.540, + "args": { + "External id": 506412,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143109206.018, "dur": 10.743, + "args": { + "External id": 506413,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143109232.733, "dur": 59951.055, + "args": { + "External id": 506414,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143109247.545, "dur": 59920.921, + "args": { + "External id": 506415,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143109347.380, "dur": 19.482, + "args": { + "External id": 506416,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143109393.784, "dur": 59721.367, + "args": { + "External id": 506417,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143109399.073, "dur": 59714.777, + "args": { + "External id": 506418,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143109404.448, "dur": 11.169, + "args": { + "External id": 506419,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143109417.648, "dur": 59690.038, + "args": { + "External id": 506420,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143169303.055, "dur": 13.320, + "args": { + "External id": 506421,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143169306.900, "dur": 9.013, + "args": { + "External id": 506422,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143169348.903, "dur": 495.403, + "args": { + "External id": 506423,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143169384.041, "dur": 453.642, + "args": { + "External id": 506424,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4151, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143169396.037, "dur": 434.974, + "args": { + "External id": 506425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143169876.776, "dur": 2.715, + "args": { + "External id": 506426,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4153, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143169955.413, "dur": 7.396, + "args": { + "External id": 506427,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143170008.989, "dur": 1.340, + "args": { + "External id": 506428,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143170025.453, "dur": 4.034, + "args": { + "External id": 506429,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143170040.916, "dur": 1.036, + "args": { + "External id": 506430,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143170091.193, "dur": 1.706, + "args": { + "External id": 506431,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143170108.835, "dur": 0.978, + "args": { + "External id": 506432,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143170120.626, "dur": 2.861, + "args": { + "External id": 506433,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143170134.203, "dur": 2.398, + "args": { + "External id": 506434,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143170146.486, "dur": 0.995, + "args": { + "External id": 506435,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143170250.136, "dur": 3238.561, + "args": { + "External id": 506436,"Record function id": 0, "Ev Idx": 4163 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.9)", "pid": 2338711, "tid": 2379440, + "ts": 6274143170274.929, "dur": 1193.648, + "args": { + "External id": 506437,"Record function id": 0, "Ev Idx": 4164 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2338711, "tid": 2379440, + "ts": 6274143170291.202, "dur": 401.294, + "args": { + "External id": 506438,"Record function id": 0, "Ev Idx": 4165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143170381.747, "dur": 4.348, + "args": { + "External id": 506439,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143170389.615, "dur": 1.153, + "args": { + "External id": 506440,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143170392.885, "dur": 2.783, + "args": { + "External id": 506441,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143170397.482, "dur": 0.540, + "args": { + "External id": 506442,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143170399.765, "dur": 0.967, + "args": { + "External id": 506443,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143170402.256, "dur": 0.943, + "args": { + "External id": 506444,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143170404.843, "dur": 1.959, + "args": { + "External id": 506445,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143170410.458, "dur": 0.950, + "args": { + "External id": 506446,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143170417.198, "dur": 0.777, + "args": { + "External id": 506447,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143170419.712, "dur": 0.711, + "args": { + "External id": 506448,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143170443.249, "dur": 203.393, + "args": { + "External id": 506449,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143170460.854, "dur": 178.491, + "args": { + "External id": 506450,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143170482.205, "dur": 20.624, + "args": { + "External id": 506451,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143170509.245, "dur": 73.276, + "args": { + "External id": 506452,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143170512.247, "dur": 69.882, + "args": { + "External id": 506453,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143170516.656, "dur": 5.921, + "args": { + "External id": 506454,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143170524.149, "dur": 57.381, + "args": { + "External id": 506455,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4182 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.8", "pid": 2338711, "tid": 2379440, + "ts": 6274143170790.561, "dur": 669.953, + "args": { + "External id": 506456,"Record function id": 0, "Ev Idx": 4183 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2338711, "tid": 2379440, + "ts": 6274143170810.530, "dur": 636.387, + "args": { + "External id": 506457,"Record function id": 0, "Ev Idx": 4184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143170871.722, "dur": 7.073, + "args": { + "External id": 506458,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143170894.177, "dur": 34.324, + "args": { + "External id": 506459,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143170899.553, "dur": 2.205, + "args": { + "External id": 506460,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143170904.130, "dur": 1.782, + "args": { + "External id": 506461,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143170907.325, "dur": 0.631, + "args": { + "External id": 506462,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143170909.408, "dur": 0.286, + "args": { + "External id": 506463,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143170912.772, "dur": 0.345, + "args": { + "External id": 506464,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143170914.688, "dur": 2.293, + "args": { + "External id": 506465,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143170918.413, "dur": 0.310, + "args": { + "External id": 506466,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143170921.448, "dur": 0.291, + "args": { + "External id": 506467,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143170923.219, "dur": 0.544, + "args": { + "External id": 506468,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143170939.293, "dur": 46.721, + "args": { + "External id": 506469,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274143171020.984, "dur": 161.676, + "args": { + "External id": 506470,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143171031.983, "dur": 3.584, + "args": { + "External id": 506471,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274143171040.821, "dur": 10.899, + "args": { + "External id": 506472,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274143171045.630, "dur": 5.683, + "args": { + "External id": 506473,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143171049.575, "dur": 0.556, + "args": { + "External id": 506474,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143171098.104, "dur": 29.130, + "args": { + "External id": 506475,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143171101.243, "dur": 0.793, + "args": { + "External id": 506476,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143171104.927, "dur": 0.436, + "args": { + "External id": 506477,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143171106.781, "dur": 2.405, + "args": { + "External id": 506478,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143171110.781, "dur": 1.431, + "args": { + "External id": 506479,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143171113.668, "dur": 0.346, + "args": { + "External id": 506480,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143171115.443, "dur": 0.264, + "args": { + "External id": 506481,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143171118.519, "dur": 0.336, + "args": { + "External id": 506482,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143171120.267, "dur": 0.303, + "args": { + "External id": 506483,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143171122.115, "dur": 0.462, + "args": { + "External id": 506484,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143171138.846, "dur": 35.187, + "args": { + "External id": 506485,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143171234.347, "dur": 134.862, + "args": { + "External id": 506486,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143171264.864, "dur": 100.813, + "args": { + "External id": 506487,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4214, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143171284.069, "dur": 77.222, + "args": { + "External id": 506488,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143171390.055, "dur": 1.648, + "args": { + "External id": 506489,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4216, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143171476.451, "dur": 1990.526, + "args": { + "External id": 506490,"Sequence number": 5516030, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4217 + } + }, + { + "ph": "f", "id": 204, "pid": 2338711, "tid": 2379440, "ts": 6274143171476.451, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143171622.678, "dur": 127.758, + "args": { + "External id": 506491,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143171800.151, "dur": 41.435, + "args": { + "External id": 506492,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274143171858.838, "dur": 53.046, + "args": { + "External id": 506493,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143171924.552, "dur": 33.006, + "args": { + "External id": 506494,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143171967.167, "dur": 34.031, + "args": { + "External id": 506495,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143172008.015, "dur": 32.086, + "args": { + "External id": 506496,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143172047.448, "dur": 97.888, + "args": { + "External id": 506497,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143172183.562, "dur": 28.289, + "args": { + "External id": 506498,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143172232.210, "dur": 29.822, + "args": { + "External id": 506499,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143172286.612, "dur": 20.654, + "args": { + "External id": 506500,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143172321.920, "dur": 15.776, + "args": { + "External id": 506501,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143172345.651, "dur": 46.079, + "args": { + "External id": 506502,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143172395.351, "dur": 35.707, + "args": { + "External id": 506503,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274143172463.611, "dur": 301.105, + "args": { + "External id": 506504,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143172547.068, "dur": 6.164, + "args": { + "External id": 506505,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143172555.473, "dur": 2.492, + "args": { + "External id": 506506,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143172559.432, "dur": 1.778, + "args": { + "External id": 506507,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143172562.432, "dur": 1.863, + "args": { + "External id": 506508,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143172630.939, "dur": 8.204, + "args": { + "External id": 506509,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143172634.019, "dur": 4.633, + "args": { + "External id": 506510,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143172644.266, "dur": 40.753, + "args": { + "External id": 506511,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143172650.950, "dur": 4.665, + "args": { + "External id": 506512,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143172687.322, "dur": 2.690, + "args": { + "External id": 506513,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143172688.772, "dur": 1.047, + "args": { + "External id": 506514,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143172694.790, "dur": 19.863, + "args": { + "External id": 506515,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143172699.719, "dur": 0.727, + "args": { + "External id": 506516,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143172805.755, "dur": 28.490, + "args": { + "External id": 506517,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143172853.411, "dur": 18.367, + "args": { + "External id": 506518,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143172880.330, "dur": 53.919, + "args": { + "External id": 506519,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143172941.081, "dur": 41.763, + "args": { + "External id": 506520,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143172993.086, "dur": 22.217, + "args": { + "External id": 506521,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143173020.597, "dur": 79.446, + "args": { + "External id": 506522,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143173124.690, "dur": 46.265, + "args": { + "External id": 506523,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143173177.446, "dur": 34.069, + "args": { + "External id": 506524,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274143173242.715, "dur": 27.682, + "args": { + "External id": 506525,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143173287.599, "dur": 28.479, + "args": { + "External id": 506526,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143173334.078, "dur": 19.030, + "args": { + "External id": 506527,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143173384.784, "dur": 15.911, + "args": { + "External id": 506528,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274143173418.334, "dur": 17.056, + "args": { + "External id": 506529,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143173512.451, "dur": 17.954, + "args": { + "External id": 506530,"Record function id": 0, "Ev Idx": 4257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143173516.292, "dur": 13.099, + "args": { + "External id": 506531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143173520.747, "dur": 7.766, + "args": { + "External id": 506532,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143173522.582, "dur": 5.842, + "args": { + "External id": 506533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143173534.513, "dur": 5.024, + "args": { + "External id": 506534,"Record function id": 0, "Ev Idx": 4261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143173536.158, "dur": 2.955, + "args": { + "External id": 506535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143173537.026, "dur": 1.620, + "args": { + "External id": 506536,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143173537.890, "dur": 0.650, + "args": { + "External id": 506537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143173542.767, "dur": 6.661, + "args": { + "External id": 506538,"Record function id": 0, "Ev Idx": 4265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143173544.231, "dur": 4.755, + "args": { + "External id": 506539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143173544.717, "dur": 3.844, + "args": { + "External id": 506540,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143173545.474, "dur": 2.978, + "args": { + "External id": 506541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143173552.633, "dur": 4.348, + "args": { + "External id": 506542,"Record function id": 0, "Ev Idx": 4269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143173554.042, "dur": 2.522, + "args": { + "External id": 506543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143173554.808, "dur": 1.355, + "args": { + "External id": 506544,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143173555.364, "dur": 0.701, + "args": { + "External id": 506545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143173560.056, "dur": 4.679, + "args": { + "External id": 506546,"Record function id": 0, "Ev Idx": 4273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143173561.822, "dur": 2.518, + "args": { + "External id": 506547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143173562.667, "dur": 1.239, + "args": { + "External id": 506548,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143173562.948, "dur": 0.871, + "args": { + "External id": 506549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143173567.744, "dur": 4.564, + "args": { + "External id": 506550,"Record function id": 0, "Ev Idx": 4277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143173569.170, "dur": 2.747, + "args": { + "External id": 506551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143173570.201, "dur": 1.280, + "args": { + "External id": 506552,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143173570.764, "dur": 0.653, + "args": { + "External id": 506553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143173575.568, "dur": 3.871, + "args": { + "External id": 506554,"Record function id": 0, "Ev Idx": 4281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143173576.994, "dur": 2.041, + "args": { + "External id": 506555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143173577.484, "dur": 1.155, + "args": { + "External id": 506556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143173577.764, "dur": 0.797, + "args": { + "External id": 506557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143173582.415, "dur": 28.039, + "args": { + "External id": 506558,"Record function id": 0, "Ev Idx": 4285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143173583.727, "dur": 25.314, + "args": { + "External id": 506559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143173584.233, "dur": 23.334, + "args": { + "External id": 506560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143173604.794, "dur": 2.357, + "args": { + "External id": 506561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143173617.807, "dur": 10.750, + "args": { + "External id": 506562,"Record function id": 0, "Ev Idx": 4289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143173620.874, "dur": 6.991, + "args": { + "External id": 506563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143173622.547, "dur": 4.607, + "args": { + "External id": 506564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143173625.704, "dur": 1.287, + "args": { + "External id": 506565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143173634.348, "dur": 61256.671, + "args": { + "External id": 506566,"Record function id": 0, "Sequence number": 5516029, "Fwd thread id": 1, "Ev Idx": 4293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143173640.211, "dur": 61240.440, + "args": { + "External id": 506567,"Sequence number": 5516029, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4294 + } + }, + { + "ph": "f", "id": 205, "pid": 2338711, "tid": 2379440, "ts": 6274143173640.211, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.9)", "pid": 2338711, "tid": 2379440, + "ts": 6274143173679.349, "dur": 46.817, + "args": { + "External id": 506568,"Record function id": 0, "Ev Idx": 4295 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.9)", "pid": 2338711, "tid": 2379440, + "ts": 6274143173734.073, "dur": 67.579, + "args": { + "External id": 506569,"Record function id": 0, "Ev Idx": 4296 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.9)", "pid": 2338711, "tid": 2379440, + "ts": 6274143173810.549, "dur": 61060.719, + "args": { + "External id": 506570,"Record function id": 0, "Ev Idx": 4297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143173907.436, "dur": 9.001, + "args": { + "External id": 506571,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143173926.869, "dur": 7.472, + "args": { + "External id": 506572,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143173949.663, "dur": 59981.803, + "args": { + "External id": 506573,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143173965.083, "dur": 59951.540, + "args": { + "External id": 506574,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143174103.978, "dur": 20.742, + "args": { + "External id": 506575,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143174146.856, "dur": 59719.485, + "args": { + "External id": 506576,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143174149.929, "dur": 59715.099, + "args": { + "External id": 506577,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143174157.757, "dur": 14.095, + "args": { + "External id": 506578,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143174176.618, "dur": 59682.319, + "args": { + "External id": 506579,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143234045.454, "dur": 43.785, + "args": { + "External id": 506580,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143234049.354, "dur": 39.044, + "args": { + "External id": 506581,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143234124.327, "dur": 400.570, + "args": { + "External id": 506582,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143234159.284, "dur": 360.731, + "args": { + "External id": 506583,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4310, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143234173.650, "dur": 340.536, + "args": { + "External id": 506584,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143234551.052, "dur": 2.368, + "args": { + "External id": 506585,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4312, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143234646.654, "dur": 8.209, + "args": { + "External id": 506586,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143234708.004, "dur": 1.653, + "args": { + "External id": 506587,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143234725.706, "dur": 4.046, + "args": { + "External id": 506588,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143234740.937, "dur": 0.995, + "args": { + "External id": 506589,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143234752.037, "dur": 0.834, + "args": { + "External id": 506590,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143234764.132, "dur": 0.963, + "args": { + "External id": 506591,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143234777.257, "dur": 3.555, + "args": { + "External id": 506592,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143234790.676, "dur": 1.974, + "args": { + "External id": 506593,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143234802.151, "dur": 0.944, + "args": { + "External id": 506594,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143234907.905, "dur": 3239.779, + "args": { + "External id": 506595,"Record function id": 0, "Ev Idx": 4322 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.8)", "pid": 2338711, "tid": 2379440, + "ts": 6274143234929.928, "dur": 1271.257, + "args": { + "External id": 506596,"Record function id": 0, "Ev Idx": 4323 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2338711, "tid": 2379440, + "ts": 6274143234946.916, "dur": 416.665, + "args": { + "External id": 506597,"Record function id": 0, "Ev Idx": 4324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143235043.295, "dur": 4.719, + "args": { + "External id": 506598,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143235051.971, "dur": 49.204, + "args": { + "External id": 506599,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143235106.052, "dur": 3.136, + "args": { + "External id": 506600,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143235111.184, "dur": 0.630, + "args": { + "External id": 506601,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143235115.787, "dur": 0.919, + "args": { + "External id": 506602,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143235118.374, "dur": 0.890, + "args": { + "External id": 506603,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143235120.960, "dur": 1.975, + "args": { + "External id": 506604,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143235124.674, "dur": 0.653, + "args": { + "External id": 506605,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143235128.988, "dur": 0.663, + "args": { + "External id": 506606,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143235130.873, "dur": 0.785, + "args": { + "External id": 506607,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143235153.567, "dur": 175.023, + "args": { + "External id": 506608,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143235171.817, "dur": 151.344, + "args": { + "External id": 506609,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143235197.181, "dur": 16.067, + "args": { + "External id": 506610,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143235217.054, "dur": 77.135, + "args": { + "External id": 506611,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143235219.908, "dur": 73.988, + "args": { + "External id": 506612,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143235224.486, "dur": 6.162, + "args": { + "External id": 506613,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143235234.819, "dur": 58.577, + "args": { + "External id": 506614,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4341 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.7", "pid": 2338711, "tid": 2379440, + "ts": 6274143235462.093, "dur": 730.642, + "args": { + "External id": 506615,"Record function id": 0, "Ev Idx": 4342 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2338711, "tid": 2379440, + "ts": 6274143235480.430, "dur": 698.911, + "args": { + "External id": 506616,"Record function id": 0, "Ev Idx": 4343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143235538.644, "dur": 4.958, + "args": { + "External id": 506617,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143235559.800, "dur": 60.127, + "args": { + "External id": 506618,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143235565.240, "dur": 3.166, + "args": { + "External id": 506619,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143235570.224, "dur": 0.598, + "args": { + "External id": 506620,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143235572.493, "dur": 0.713, + "args": { + "External id": 506621,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143235576.679, "dur": 0.376, + "args": { + "External id": 506622,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143235578.843, "dur": 0.339, + "args": { + "External id": 506623,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143235580.540, "dur": 2.647, + "args": { + "External id": 506624,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143235605.649, "dur": 0.761, + "args": { + "External id": 506625,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143235609.290, "dur": 0.467, + "args": { + "External id": 506626,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143235611.936, "dur": 1.719, + "args": { + "External id": 506627,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143235635.814, "dur": 60.592, + "args": { + "External id": 506628,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274143235733.818, "dur": 132.879, + "args": { + "External id": 506629,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143235745.771, "dur": 5.586, + "args": { + "External id": 506630,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274143235756.618, "dur": 11.115, + "args": { + "External id": 506631,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274143235761.586, "dur": 5.673, + "args": { + "External id": 506632,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143235765.532, "dur": 0.628, + "args": { + "External id": 506633,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143235776.596, "dur": 35.323, + "args": { + "External id": 506634,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143235779.873, "dur": 0.304, + "args": { + "External id": 506635,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143235781.858, "dur": 0.540, + "args": { + "External id": 506636,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143235789.506, "dur": 3.732, + "args": { + "External id": 506637,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143235795.346, "dur": 0.322, + "args": { + "External id": 506638,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143235797.163, "dur": 0.568, + "args": { + "External id": 506639,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143235800.612, "dur": 0.314, + "args": { + "External id": 506640,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143235802.468, "dur": 0.423, + "args": { + "External id": 506641,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143235804.893, "dur": 0.333, + "args": { + "External id": 506642,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143235807.721, "dur": 0.579, + "args": { + "External id": 506643,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143235825.772, "dur": 32.909, + "args": { + "External id": 506644,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143235917.293, "dur": 175.979, + "args": { + "External id": 506645,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143235954.789, "dur": 95.673, + "args": { + "External id": 506646,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4373, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143235965.075, "dur": 80.639, + "args": { + "External id": 506647,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143236114.556, "dur": 2.681, + "args": { + "External id": 506648,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4375, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143236209.402, "dur": 1916.030, + "args": { + "External id": 506649,"Sequence number": 5516028, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4376 + } + }, + { + "ph": "f", "id": 206, "pid": 2338711, "tid": 2379440, "ts": 6274143236209.402, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143236327.879, "dur": 113.150, + "args": { + "External id": 506650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143236491.528, "dur": 42.571, + "args": { + "External id": 506651,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274143236551.198, "dur": 88.588, + "args": { + "External id": 506652,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143236661.697, "dur": 47.607, + "args": { + "External id": 506653,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143236716.117, "dur": 35.342, + "args": { + "External id": 506654,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143236757.611, "dur": 29.273, + "args": { + "External id": 506655,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143236792.732, "dur": 29.651, + "args": { + "External id": 506656,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143236854.737, "dur": 28.489, + "args": { + "External id": 506657,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143236904.454, "dur": 31.864, + "args": { + "External id": 506658,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143236958.745, "dur": 19.365, + "args": { + "External id": 506659,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143236992.226, "dur": 14.189, + "args": { + "External id": 506660,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143237013.490, "dur": 36.700, + "args": { + "External id": 506661,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143237090.957, "dur": 39.035, + "args": { + "External id": 506662,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274143237166.660, "dur": 254.079, + "args": { + "External id": 506663,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143237247.761, "dur": 6.465, + "args": { + "External id": 506664,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143237256.551, "dur": 2.446, + "args": { + "External id": 506665,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143237260.457, "dur": 2.058, + "args": { + "External id": 506666,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143237263.873, "dur": 2.704, + "args": { + "External id": 506667,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143237313.819, "dur": 5.673, + "args": { + "External id": 506668,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143237315.941, "dur": 3.317, + "args": { + "External id": 506669,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143237321.438, "dur": 35.708, + "args": { + "External id": 506670,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143237327.070, "dur": 3.949, + "args": { + "External id": 506671,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143237358.658, "dur": 2.127, + "args": { + "External id": 506672,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143237360.084, "dur": 0.628, + "args": { + "External id": 506673,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143237361.975, "dur": 16.392, + "args": { + "External id": 506674,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143237364.226, "dur": 0.446, + "args": { + "External id": 506675,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143237458.688, "dur": 28.136, + "args": { + "External id": 506676,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143237506.206, "dur": 19.172, + "args": { + "External id": 506677,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143237533.036, "dur": 40.461, + "args": { + "External id": 506678,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143237579.839, "dur": 72.459, + "args": { + "External id": 506679,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143237670.936, "dur": 50.531, + "args": { + "External id": 506680,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143237736.275, "dur": 38.806, + "args": { + "External id": 506681,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143237783.203, "dur": 30.662, + "args": { + "External id": 506682,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143237823.245, "dur": 30.745, + "args": { + "External id": 506683,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274143237886.284, "dur": 27.119, + "args": { + "External id": 506684,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143237930.413, "dur": 25.357, + "args": { + "External id": 506685,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143237973.320, "dur": 17.667, + "args": { + "External id": 506686,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143238011.447, "dur": 13.630, + "args": { + "External id": 506687,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274143238038.226, "dur": 16.197, + "args": { + "External id": 506688,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143238172.033, "dur": 15.994, + "args": { + "External id": 506689,"Record function id": 0, "Ev Idx": 4416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143238175.439, "dur": 11.639, + "args": { + "External id": 506690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143238180.095, "dur": 6.084, + "args": { + "External id": 506691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143238181.861, "dur": 4.229, + "args": { + "External id": 506692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143238191.770, "dur": 4.994, + "args": { + "External id": 506693,"Record function id": 0, "Ev Idx": 4420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143238193.371, "dur": 2.915, + "args": { + "External id": 506694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143238194.478, "dur": 1.347, + "args": { + "External id": 506695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143238194.920, "dur": 0.828, + "args": { + "External id": 506696,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143238200.116, "dur": 9.262, + "args": { + "External id": 506697,"Record function id": 0, "Ev Idx": 4424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143238201.263, "dur": 7.670, + "args": { + "External id": 506698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143238201.752, "dur": 6.741, + "args": { + "External id": 506699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143238205.403, "dur": 3.006, + "args": { + "External id": 506700,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143238212.582, "dur": 4.776, + "args": { + "External id": 506701,"Record function id": 0, "Ev Idx": 4428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143238213.854, "dur": 3.058, + "args": { + "External id": 506702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143238214.940, "dur": 1.549, + "args": { + "External id": 506703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143238215.596, "dur": 0.813, + "args": { + "External id": 506704,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143238220.500, "dur": 3.741, + "args": { + "External id": 506705,"Record function id": 0, "Ev Idx": 4432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143238221.674, "dur": 2.154, + "args": { + "External id": 506706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143238222.373, "dur": 1.045, + "args": { + "External id": 506707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143238222.729, "dur": 0.607, + "args": { + "External id": 506708,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143238227.442, "dur": 4.106, + "args": { + "External id": 506709,"Record function id": 0, "Ev Idx": 4436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143238228.921, "dur": 2.175, + "args": { + "External id": 506710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143238229.389, "dur": 1.255, + "args": { + "External id": 506711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143238229.990, "dur": 0.575, + "args": { + "External id": 506712,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143238234.800, "dur": 5.471, + "args": { + "External id": 506713,"Record function id": 0, "Ev Idx": 4440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143238235.813, "dur": 4.064, + "args": { + "External id": 506714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143238236.271, "dur": 3.149, + "args": { + "External id": 506715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143238238.571, "dur": 0.765, + "args": { + "External id": 506716,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143238243.452, "dur": 4.211, + "args": { + "External id": 506717,"Record function id": 0, "Ev Idx": 4444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143238244.722, "dur": 2.503, + "args": { + "External id": 506718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143238245.271, "dur": 1.529, + "args": { + "External id": 506719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143238245.789, "dur": 0.947, + "args": { + "External id": 506720,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143238250.787, "dur": 3.763, + "args": { + "External id": 506721,"Record function id": 0, "Ev Idx": 4448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143238252.155, "dur": 2.004, + "args": { + "External id": 506722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143238252.652, "dur": 1.033, + "args": { + "External id": 506723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143238253.023, "dur": 0.575, + "args": { + "External id": 506724,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143238258.702, "dur": 63237.311, + "args": { + "External id": 506725,"Record function id": 0, "Sequence number": 5516027, "Fwd thread id": 1, "Ev Idx": 4452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143238264.008, "dur": 63222.371, + "args": { + "External id": 506726,"Sequence number": 5516027, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4453 + } + }, + { + "ph": "f", "id": 207, "pid": 2338711, "tid": 2379440, "ts": 6274143238264.008, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.8)", "pid": 2338711, "tid": 2379440, + "ts": 6274143238297.276, "dur": 42.056, + "args": { + "External id": 506727,"Record function id": 0, "Ev Idx": 4454 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.8)", "pid": 2338711, "tid": 2379440, + "ts": 6274143238347.673, "dur": 68.551, + "args": { + "External id": 506728,"Record function id": 0, "Ev Idx": 4455 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.8)", "pid": 2338711, "tid": 2379440, + "ts": 6274143238422.632, "dur": 63055.734, + "args": { + "External id": 506729,"Record function id": 0, "Ev Idx": 4456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143238520.860, "dur": 7.736, + "args": { + "External id": 506730,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143238538.792, "dur": 6.686, + "args": { + "External id": 506731,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143238564.876, "dur": 61938.114, + "args": { + "External id": 506732,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143238582.205, "dur": 61905.972, + "args": { + "External id": 506733,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143238718.788, "dur": 21.413, + "args": { + "External id": 506734,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143238761.526, "dur": 61677.592, + "args": { + "External id": 506735,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143238764.917, "dur": 61672.813, + "args": { + "External id": 506736,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143238771.752, "dur": 12.259, + "args": { + "External id": 506737,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143238786.095, "dur": 61644.997, + "args": { + "External id": 506738,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143300653.737, "dur": 16.062, + "args": { + "External id": 506739,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143300658.374, "dur": 10.669, + "args": { + "External id": 506740,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143300703.365, "dur": 453.292, + "args": { + "External id": 506741,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143300739.473, "dur": 411.414, + "args": { + "External id": 506742,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4469, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143300754.124, "dur": 389.327, + "args": { + "External id": 506743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143301186.799, "dur": 2.769, + "args": { + "External id": 506744,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4471, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143301261.741, "dur": 7.396, + "args": { + "External id": 506745,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143301313.995, "dur": 1.665, + "args": { + "External id": 506746,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143301330.245, "dur": 3.896, + "args": { + "External id": 506747,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143301345.945, "dur": 1.236, + "args": { + "External id": 506748,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143301359.115, "dur": 0.971, + "args": { + "External id": 506749,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143301373.707, "dur": 0.827, + "args": { + "External id": 506750,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143301384.926, "dur": 3.332, + "args": { + "External id": 506751,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143301398.565, "dur": 2.373, + "args": { + "External id": 506752,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143301410.616, "dur": 0.653, + "args": { + "External id": 506753,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143301512.346, "dur": 3221.813, + "args": { + "External id": 506754,"Record function id": 0, "Ev Idx": 4481 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.7)", "pid": 2338711, "tid": 2379440, + "ts": 6274143301535.970, "dur": 1200.033, + "args": { + "External id": 506755,"Record function id": 0, "Ev Idx": 4482 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2338711, "tid": 2379440, + "ts": 6274143301553.162, "dur": 387.241, + "args": { + "External id": 506756,"Record function id": 0, "Ev Idx": 4483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143301677.051, "dur": 5.476, + "args": { + "External id": 506757,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143301686.325, "dur": 1.053, + "args": { + "External id": 506758,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143301689.096, "dur": 3.381, + "args": { + "External id": 506759,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143301694.025, "dur": 1.184, + "args": { + "External id": 506760,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143301696.708, "dur": 0.771, + "args": { + "External id": 506761,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143301698.542, "dur": 0.704, + "args": { + "External id": 506762,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143301700.887, "dur": 2.582, + "args": { + "External id": 506763,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143301706.845, "dur": 0.774, + "args": { + "External id": 506764,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143301708.814, "dur": 0.722, + "args": { + "External id": 506765,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143301710.928, "dur": 0.990, + "args": { + "External id": 506766,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143301732.334, "dur": 172.749, + "args": { + "External id": 506767,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143301750.020, "dur": 149.963, + "args": { + "External id": 506768,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143301774.205, "dur": 18.346, + "args": { + "External id": 506769,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143301798.549, "dur": 71.320, + "args": { + "External id": 506770,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143301801.399, "dur": 67.971, + "args": { + "External id": 506771,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143301805.991, "dur": 6.296, + "args": { + "External id": 506772,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143301813.862, "dur": 54.863, + "args": { + "External id": 506773,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4500 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.6", "pid": 2338711, "tid": 2379440, + "ts": 6274143302028.990, "dur": 698.569, + "args": { + "External id": 506774,"Record function id": 0, "Ev Idx": 4501 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2338711, "tid": 2379440, + "ts": 6274143302047.324, "dur": 666.884, + "args": { + "External id": 506775,"Record function id": 0, "Ev Idx": 4502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143302149.184, "dur": 7.880, + "args": { + "External id": 506776,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143302174.600, "dur": 38.574, + "args": { + "External id": 506777,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143302179.969, "dur": 2.853, + "args": { + "External id": 506778,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143302184.964, "dur": 0.534, + "args": { + "External id": 506779,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143302190.488, "dur": 0.757, + "args": { + "External id": 506780,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143302194.000, "dur": 0.398, + "args": { + "External id": 506781,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143302195.796, "dur": 0.475, + "args": { + "External id": 506782,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143302197.920, "dur": 2.450, + "args": { + "External id": 506783,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143302202.894, "dur": 0.460, + "args": { + "External id": 506784,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143302205.150, "dur": 0.251, + "args": { + "External id": 506785,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143302206.964, "dur": 1.299, + "args": { + "External id": 506786,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143302224.248, "dur": 50.033, + "args": { + "External id": 506787,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274143302308.120, "dur": 115.340, + "args": { + "External id": 506788,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143302319.061, "dur": 4.106, + "args": { + "External id": 506789,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274143302328.418, "dur": 10.602, + "args": { + "External id": 506790,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274143302333.143, "dur": 5.440, + "args": { + "External id": 506791,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143302336.757, "dur": 0.582, + "args": { + "External id": 506792,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143302345.916, "dur": 28.449, + "args": { + "External id": 506793,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143302348.428, "dur": 0.475, + "args": { + "External id": 506794,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143302350.922, "dur": 0.397, + "args": { + "External id": 506795,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143302353.090, "dur": 2.913, + "args": { + "External id": 506796,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143302357.366, "dur": 0.435, + "args": { + "External id": 506797,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143302359.211, "dur": 0.638, + "args": { + "External id": 506798,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143302363.284, "dur": 0.327, + "args": { + "External id": 506799,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143302365.145, "dur": 0.362, + "args": { + "External id": 506800,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143302366.886, "dur": 0.272, + "args": { + "External id": 506801,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143302370.039, "dur": 0.496, + "args": { + "External id": 506802,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143302385.251, "dur": 30.208, + "args": { + "External id": 506803,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143302468.439, "dur": 146.455, + "args": { + "External id": 506804,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143302498.794, "dur": 111.529, + "args": { + "External id": 506805,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4532, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143302508.894, "dur": 75.727, + "args": { + "External id": 506806,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143302641.961, "dur": 3.511, + "args": { + "External id": 506807,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4534, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143302744.722, "dur": 1966.743, + "args": { + "External id": 506808,"Sequence number": 5516026, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4535 + } + }, + { + "ph": "f", "id": 208, "pid": 2338711, "tid": 2379440, "ts": 6274143302744.722, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143302862.531, "dur": 114.108, + "args": { + "External id": 506809,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143303024.851, "dur": 86.841, + "args": { + "External id": 506810,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274143303152.874, "dur": 62.288, + "args": { + "External id": 506811,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143303228.564, "dur": 32.771, + "args": { + "External id": 506812,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143303267.745, "dur": 33.066, + "args": { + "External id": 506813,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143303307.441, "dur": 29.172, + "args": { + "External id": 506814,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143303342.690, "dur": 29.485, + "args": { + "External id": 506815,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143303406.075, "dur": 26.992, + "args": { + "External id": 506816,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143303455.414, "dur": 31.001, + "args": { + "External id": 506817,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143303509.200, "dur": 21.235, + "args": { + "External id": 506818,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143303545.773, "dur": 16.249, + "args": { + "External id": 506819,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143303569.777, "dur": 63.136, + "args": { + "External id": 506820,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143303640.523, "dur": 46.935, + "args": { + "External id": 506821,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274143303723.974, "dur": 276.320, + "args": { + "External id": 506822,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143303809.983, "dur": 6.942, + "args": { + "External id": 506823,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143303819.111, "dur": 2.537, + "args": { + "External id": 506824,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143303823.171, "dur": 1.700, + "args": { + "External id": 506825,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143303826.516, "dur": 3.200, + "args": { + "External id": 506826,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143303885.948, "dur": 12.163, + "args": { + "External id": 506827,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143303894.702, "dur": 3.194, + "args": { + "External id": 506828,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143303900.648, "dur": 33.336, + "args": { + "External id": 506829,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143303906.808, "dur": 3.532, + "args": { + "External id": 506830,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143303935.549, "dur": 2.429, + "args": { + "External id": 506831,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143303936.955, "dur": 0.947, + "args": { + "External id": 506832,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143303939.226, "dur": 16.388, + "args": { + "External id": 506833,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143303943.172, "dur": 0.610, + "args": { + "External id": 506834,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143304040.513, "dur": 74.132, + "args": { + "External id": 506835,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143304139.404, "dur": 18.561, + "args": { + "External id": 506836,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143304180.494, "dur": 63.744, + "args": { + "External id": 506837,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143304252.894, "dur": 45.448, + "args": { + "External id": 506838,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143304310.888, "dur": 20.673, + "args": { + "External id": 506839,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143304337.323, "dur": 34.493, + "args": { + "External id": 506840,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143304378.758, "dur": 28.818, + "args": { + "External id": 506841,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143304415.144, "dur": 29.724, + "args": { + "External id": 506842,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274143304469.250, "dur": 25.051, + "args": { + "External id": 506843,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143304512.987, "dur": 23.611, + "args": { + "External id": 506844,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143304553.319, "dur": 17.274, + "args": { + "External id": 506845,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143304610.872, "dur": 22.867, + "args": { + "External id": 506846,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274143304654.135, "dur": 22.323, + "args": { + "External id": 506847,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143304759.189, "dur": 16.234, + "args": { + "External id": 506848,"Record function id": 0, "Ev Idx": 4575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143304762.658, "dur": 11.678, + "args": { + "External id": 506849,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143304767.191, "dur": 6.227, + "args": { + "External id": 506850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143304768.870, "dur": 4.462, + "args": { + "External id": 506851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143304779.308, "dur": 5.390, + "args": { + "External id": 506852,"Record function id": 0, "Ev Idx": 4579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143304780.793, "dur": 3.489, + "args": { + "External id": 506853,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143304781.792, "dur": 2.000, + "args": { + "External id": 506854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143304782.730, "dur": 0.974, + "args": { + "External id": 506855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143304788.145, "dur": 6.527, + "args": { + "External id": 506856,"Record function id": 0, "Ev Idx": 4583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143304789.608, "dur": 4.631, + "args": { + "External id": 506857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143304790.497, "dur": 3.307, + "args": { + "External id": 506858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143304791.261, "dur": 2.436, + "args": { + "External id": 506859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143304797.880, "dur": 4.713, + "args": { + "External id": 506860,"Record function id": 0, "Ev Idx": 4587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143304799.287, "dur": 2.906, + "args": { + "External id": 506861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143304800.325, "dur": 1.465, + "args": { + "External id": 506862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143304800.648, "dur": 1.071, + "args": { + "External id": 506863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143304805.583, "dur": 3.869, + "args": { + "External id": 506864,"Record function id": 0, "Ev Idx": 4591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143304807.121, "dur": 1.940, + "args": { + "External id": 506865,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143304807.554, "dur": 1.073, + "args": { + "External id": 506866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143304807.834, "dur": 0.705, + "args": { + "External id": 506867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143304812.479, "dur": 4.306, + "args": { + "External id": 506868,"Record function id": 0, "Ev Idx": 4595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143304813.892, "dur": 2.488, + "args": { + "External id": 506869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143304814.804, "dur": 1.123, + "args": { + "External id": 506870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143304815.314, "dur": 0.536, + "args": { + "External id": 506871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143304819.965, "dur": 5.855, + "args": { + "External id": 506872,"Record function id": 0, "Ev Idx": 4599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143304821.000, "dur": 4.436, + "args": { + "External id": 506873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143304821.435, "dur": 3.572, + "args": { + "External id": 506874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143304824.331, "dur": 0.573, + "args": { + "External id": 506875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143304829.015, "dur": 7.117, + "args": { + "External id": 506876,"Record function id": 0, "Ev Idx": 4603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143304833.750, "dur": 2.009, + "args": { + "External id": 506877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143304834.433, "dur": 0.892, + "args": { + "External id": 506878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143304834.710, "dur": 0.522, + "args": { + "External id": 506879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143304839.241, "dur": 34.130, + "args": { + "External id": 506880,"Record function id": 0, "Ev Idx": 4607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143304870.790, "dur": 2.169, + "args": { + "External id": 506881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143304871.346, "dur": 1.176, + "args": { + "External id": 506882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143304871.822, "dur": 0.617, + "args": { + "External id": 506883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143304877.639, "dur": 62588.956, + "args": { + "External id": 506884,"Record function id": 0, "Sequence number": 5516025, "Fwd thread id": 1, "Ev Idx": 4611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143304879.437, "dur": 62577.669, + "args": { + "External id": 506885,"Sequence number": 5516025, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4612 + } + }, + { + "ph": "f", "id": 209, "pid": 2338711, "tid": 2379440, "ts": 6274143304879.437, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.7)", "pid": 2338711, "tid": 2379440, + "ts": 6274143304915.988, "dur": 44.033, + "args": { + "External id": 506886,"Record function id": 0, "Ev Idx": 4613 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.7)", "pid": 2338711, "tid": 2379440, + "ts": 6274143304968.337, "dur": 76.213, + "args": { + "External id": 506887,"Record function id": 0, "Ev Idx": 4614 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.7)", "pid": 2338711, "tid": 2379440, + "ts": 6274143305051.868, "dur": 62396.800, + "args": { + "External id": 506888,"Record function id": 0, "Ev Idx": 4615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143305195.530, "dur": 9.379, + "args": { + "External id": 506889,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143305216.795, "dur": 7.714, + "args": { + "External id": 506890,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143305242.982, "dur": 61168.788, + "args": { + "External id": 506891,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143305257.605, "dur": 61138.412, + "args": { + "External id": 506892,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143305348.613, "dur": 18.545, + "args": { + "External id": 506893,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143305387.254, "dur": 60951.976, + "args": { + "External id": 506894,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143305390.328, "dur": 60947.269, + "args": { + "External id": 506895,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143305395.157, "dur": 11.562, + "args": { + "External id": 506896,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143305408.828, "dur": 60922.413, + "args": { + "External id": 506897,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143366533.792, "dur": 14.905, + "args": { + "External id": 506898,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143366537.927, "dur": 10.268, + "args": { + "External id": 506899,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143366583.613, "dur": 538.153, + "args": { + "External id": 506900,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143366641.968, "dur": 473.318, + "args": { + "External id": 506901,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4628, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143366659.485, "dur": 448.743, + "args": { + "External id": 506902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143367151.857, "dur": 2.804, + "args": { + "External id": 506903,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4630, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143367234.260, "dur": 7.941, + "args": { + "External id": 506904,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143367288.622, "dur": 1.265, + "args": { + "External id": 506905,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143367305.206, "dur": 3.616, + "args": { + "External id": 506906,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143367320.113, "dur": 1.097, + "args": { + "External id": 506907,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143367334.180, "dur": 0.779, + "args": { + "External id": 506908,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143367344.794, "dur": 0.931, + "args": { + "External id": 506909,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143367356.400, "dur": 2.894, + "args": { + "External id": 506910,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143367368.673, "dur": 2.623, + "args": { + "External id": 506911,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143367380.532, "dur": 0.855, + "args": { + "External id": 506912,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143367481.925, "dur": 3317.219, + "args": { + "External id": 506913,"Record function id": 0, "Ev Idx": 4640 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.6)", "pid": 2338711, "tid": 2379440, + "ts": 6274143367505.903, "dur": 1265.805, + "args": { + "External id": 506914,"Record function id": 0, "Ev Idx": 4641 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2338711, "tid": 2379440, + "ts": 6274143367522.048, "dur": 424.963, + "args": { + "External id": 506915,"Record function id": 0, "Ev Idx": 4642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143367654.882, "dur": 5.967, + "args": { + "External id": 506916,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143367665.969, "dur": 0.946, + "args": { + "External id": 506917,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143367669.476, "dur": 3.886, + "args": { + "External id": 506918,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143367676.096, "dur": 1.327, + "args": { + "External id": 506919,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143367679.967, "dur": 0.850, + "args": { + "External id": 506920,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143367682.731, "dur": 0.666, + "args": { + "External id": 506921,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143367684.927, "dur": 2.058, + "args": { + "External id": 506922,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143367691.166, "dur": 0.643, + "args": { + "External id": 506923,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143367693.243, "dur": 0.580, + "args": { + "External id": 506924,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143367695.307, "dur": 0.607, + "args": { + "External id": 506925,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143367720.780, "dur": 184.657, + "args": { + "External id": 506926,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143367738.150, "dur": 161.837, + "args": { + "External id": 506927,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143367765.130, "dur": 22.990, + "args": { + "External id": 506928,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143367792.323, "dur": 77.195, + "args": { + "External id": 506929,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143367795.072, "dur": 74.097, + "args": { + "External id": 506930,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143367799.537, "dur": 5.891, + "args": { + "External id": 506931,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143367807.217, "dur": 61.260, + "args": { + "External id": 506932,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4659 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.5", "pid": 2338711, "tid": 2379440, + "ts": 6274143368038.929, "dur": 724.605, + "args": { + "External id": 506933,"Record function id": 0, "Ev Idx": 4660 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2338711, "tid": 2379440, + "ts": 6274143368098.875, "dur": 651.511, + "args": { + "External id": 506934,"Record function id": 0, "Ev Idx": 4661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143368165.980, "dur": 7.356, + "args": { + "External id": 506935,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143368190.526, "dur": 36.699, + "args": { + "External id": 506936,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143368196.145, "dur": 3.137, + "args": { + "External id": 506937,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143368201.320, "dur": 0.594, + "args": { + "External id": 506938,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143368203.483, "dur": 0.569, + "args": { + "External id": 506939,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143368207.441, "dur": 0.289, + "args": { + "External id": 506940,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143368209.415, "dur": 0.319, + "args": { + "External id": 506941,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143368211.070, "dur": 2.351, + "args": { + "External id": 506942,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143368216.387, "dur": 0.369, + "args": { + "External id": 506943,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143368218.393, "dur": 0.348, + "args": { + "External id": 506944,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143368220.347, "dur": 1.809, + "args": { + "External id": 506945,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143368238.890, "dur": 50.269, + "args": { + "External id": 506946,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274143368322.197, "dur": 120.412, + "args": { + "External id": 506947,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143368333.173, "dur": 4.187, + "args": { + "External id": 506948,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274143368342.901, "dur": 10.934, + "args": { + "External id": 506949,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274143368347.536, "dur": 5.859, + "args": { + "External id": 506950,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143368351.558, "dur": 0.665, + "args": { + "External id": 506951,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143368361.023, "dur": 27.732, + "args": { + "External id": 506952,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143368363.357, "dur": 0.484, + "args": { + "External id": 506953,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143368365.511, "dur": 0.264, + "args": { + "External id": 506954,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143368367.309, "dur": 3.854, + "args": { + "External id": 506955,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143368372.552, "dur": 0.331, + "args": { + "External id": 506956,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143368374.182, "dur": 0.380, + "args": { + "External id": 506957,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143368377.541, "dur": 0.587, + "args": { + "External id": 506958,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143368379.795, "dur": 0.360, + "args": { + "External id": 506959,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143368381.475, "dur": 0.366, + "args": { + "External id": 506960,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143368384.863, "dur": 0.236, + "args": { + "External id": 506961,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143368401.519, "dur": 33.546, + "args": { + "External id": 506962,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143368489.333, "dur": 161.257, + "args": { + "External id": 506963,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143368522.176, "dur": 123.501, + "args": { + "External id": 506964,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4691, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143368533.420, "dur": 105.800, + "args": { + "External id": 506965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143368679.251, "dur": 2.725, + "args": { + "External id": 506966,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4693, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143368779.985, "dur": 1995.474, + "args": { + "External id": 506967,"Sequence number": 5516024, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4694 + } + }, + { + "ph": "f", "id": 210, "pid": 2338711, "tid": 2379440, "ts": 6274143368779.985, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143368900.036, "dur": 117.414, + "args": { + "External id": 506968,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143369105.422, "dur": 49.933, + "args": { + "External id": 506969,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274143369177.090, "dur": 61.927, + "args": { + "External id": 506970,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143369252.118, "dur": 33.423, + "args": { + "External id": 506971,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143369307.600, "dur": 34.502, + "args": { + "External id": 506972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143369349.303, "dur": 27.918, + "args": { + "External id": 506973,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143369386.008, "dur": 30.316, + "args": { + "External id": 506974,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143369447.486, "dur": 25.528, + "args": { + "External id": 506975,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143369499.350, "dur": 29.241, + "args": { + "External id": 506976,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143369555.564, "dur": 21.067, + "args": { + "External id": 506977,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143369611.282, "dur": 22.564, + "args": { + "External id": 506978,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143369648.250, "dur": 53.251, + "args": { + "External id": 506979,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143369706.371, "dur": 35.455, + "args": { + "External id": 506980,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274143369774.253, "dur": 325.881, + "args": { + "External id": 506981,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143369860.940, "dur": 6.976, + "args": { + "External id": 506982,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143369870.359, "dur": 2.432, + "args": { + "External id": 506983,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143369874.587, "dur": 5.224, + "args": { + "External id": 506984,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143369881.460, "dur": 2.276, + "args": { + "External id": 506985,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143369930.691, "dur": 7.882, + "args": { + "External id": 506986,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143369935.036, "dur": 3.321, + "args": { + "External id": 506987,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143369940.634, "dur": 36.334, + "args": { + "External id": 506988,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143369946.454, "dur": 3.783, + "args": { + "External id": 506989,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143369978.441, "dur": 1.593, + "args": { + "External id": 506990,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143369979.260, "dur": 0.668, + "args": { + "External id": 506991,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143369981.101, "dur": 19.121, + "args": { + "External id": 506992,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143369985.675, "dur": 0.606, + "args": { + "External id": 506993,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143370155.279, "dur": 43.223, + "args": { + "External id": 506994,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143370220.591, "dur": 22.501, + "args": { + "External id": 506995,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143370251.412, "dur": 54.280, + "args": { + "External id": 506996,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143370312.122, "dur": 40.371, + "args": { + "External id": 506997,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143370363.614, "dur": 21.503, + "args": { + "External id": 506998,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143370391.075, "dur": 31.475, + "args": { + "External id": 506999,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143370429.544, "dur": 28.274, + "args": { + "External id": 507000,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143370466.299, "dur": 30.429, + "args": { + "External id": 507001,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274143370518.339, "dur": 23.809, + "args": { + "External id": 507002,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143370560.909, "dur": 40.930, + "args": { + "External id": 507003,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143370626.399, "dur": 24.466, + "args": { + "External id": 507004,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143370677.652, "dur": 19.280, + "args": { + "External id": 507005,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274143370714.319, "dur": 22.425, + "args": { + "External id": 507006,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143370823.769, "dur": 16.576, + "args": { + "External id": 507007,"Record function id": 0, "Ev Idx": 4734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143370827.366, "dur": 11.939, + "args": { + "External id": 507008,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143370832.161, "dur": 6.385, + "args": { + "External id": 507009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143370833.871, "dur": 4.590, + "args": { + "External id": 507010,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143370844.608, "dur": 4.224, + "args": { + "External id": 507011,"Record function id": 0, "Ev Idx": 4738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143370846.068, "dur": 2.344, + "args": { + "External id": 507012,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143370846.824, "dur": 1.128, + "args": { + "External id": 507013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143370847.143, "dur": 0.718, + "args": { + "External id": 507014,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143370852.223, "dur": 7.331, + "args": { + "External id": 507015,"Record function id": 0, "Ev Idx": 4742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143370853.675, "dur": 5.442, + "args": { + "External id": 507016,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143370854.312, "dur": 4.318, + "args": { + "External id": 507017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143370855.183, "dur": 3.314, + "args": { + "External id": 507018,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143370862.610, "dur": 4.249, + "args": { + "External id": 507019,"Record function id": 0, "Ev Idx": 4746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143370864.131, "dur": 2.320, + "args": { + "External id": 507020,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143370864.712, "dur": 1.258, + "args": { + "External id": 507021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143370865.081, "dur": 0.815, + "args": { + "External id": 507022,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143370869.774, "dur": 4.352, + "args": { + "External id": 507023,"Record function id": 0, "Ev Idx": 4750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143370871.275, "dur": 2.479, + "args": { + "External id": 507024,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143370872.015, "dur": 1.321, + "args": { + "External id": 507025,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143370872.506, "dur": 0.754, + "args": { + "External id": 507026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143370877.379, "dur": 3.789, + "args": { + "External id": 507027,"Record function id": 0, "Ev Idx": 4754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143370878.737, "dur": 2.035, + "args": { + "External id": 507028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143370879.148, "dur": 1.216, + "args": { + "External id": 507029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143370879.561, "dur": 0.720, + "args": { + "External id": 507030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143370884.761, "dur": 5.868, + "args": { + "External id": 507031,"Record function id": 0, "Ev Idx": 4758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143370886.003, "dur": 4.227, + "args": { + "External id": 507032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143370886.475, "dur": 3.323, + "args": { + "External id": 507033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143370888.998, "dur": 0.700, + "args": { + "External id": 507034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143370893.909, "dur": 4.051, + "args": { + "External id": 507035,"Record function id": 0, "Ev Idx": 4762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143370895.411, "dur": 2.162, + "args": { + "External id": 507036,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143370895.950, "dur": 1.188, + "args": { + "External id": 507037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143370896.403, "dur": 0.636, + "args": { + "External id": 507038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143370901.088, "dur": 3.498, + "args": { + "External id": 507039,"Record function id": 0, "Ev Idx": 4766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143370902.168, "dur": 2.040, + "args": { + "External id": 507040,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143370902.662, "dur": 1.128, + "args": { + "External id": 507041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143370903.043, "dur": 0.661, + "args": { + "External id": 507042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143370909.148, "dur": 61980.334, + "args": { + "External id": 507043,"Record function id": 0, "Sequence number": 5516023, "Fwd thread id": 1, "Ev Idx": 4770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143370911.142, "dur": 61968.495, + "args": { + "External id": 507044,"Sequence number": 5516023, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4771 + } + }, + { + "ph": "f", "id": 211, "pid": 2338711, "tid": 2379440, "ts": 6274143370911.142, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.6)", "pid": 2338711, "tid": 2379440, + "ts": 6274143370945.352, "dur": 44.076, + "args": { + "External id": 507045,"Record function id": 0, "Ev Idx": 4772 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.6)", "pid": 2338711, "tid": 2379440, + "ts": 6274143370997.843, "dur": 114.016, + "args": { + "External id": 507046,"Record function id": 0, "Ev Idx": 4773 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.6)", "pid": 2338711, "tid": 2379440, + "ts": 6274143371121.142, "dur": 61749.210, + "args": { + "External id": 507047,"Record function id": 0, "Ev Idx": 4774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143371226.880, "dur": 11.548, + "args": { + "External id": 507048,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143371249.411, "dur": 7.420, + "args": { + "External id": 507049,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143371278.389, "dur": 60609.580, + "args": { + "External id": 507050,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143371296.523, "dur": 60576.190, + "args": { + "External id": 507051,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143371391.050, "dur": 19.376, + "args": { + "External id": 507052,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143371430.915, "dur": 60392.620, + "args": { + "External id": 507053,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143371434.071, "dur": 60388.090, + "args": { + "External id": 507054,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143371439.448, "dur": 12.427, + "args": { + "External id": 507055,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143371453.984, "dur": 60361.271, + "args": { + "External id": 507056,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143432015.090, "dur": 14.371, + "args": { + "External id": 507057,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143432019.163, "dur": 9.798, + "args": { + "External id": 507058,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143432096.345, "dur": 419.449, + "args": { + "External id": 507059,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143432133.455, "dur": 376.981, + "args": { + "External id": 507060,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4787, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143432148.752, "dur": 356.240, + "args": { + "External id": 507061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143432540.128, "dur": 2.552, + "args": { + "External id": 507062,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4789, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143432638.776, "dur": 8.725, + "args": { + "External id": 507063,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143432701.595, "dur": 1.754, + "args": { + "External id": 507064,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143432719.022, "dur": 3.680, + "args": { + "External id": 507065,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143432734.753, "dur": 0.848, + "args": { + "External id": 507066,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143432751.704, "dur": 0.971, + "args": { + "External id": 507067,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143432763.105, "dur": 0.912, + "args": { + "External id": 507068,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143432773.777, "dur": 3.435, + "args": { + "External id": 507069,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143432786.866, "dur": 2.230, + "args": { + "External id": 507070,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143432798.761, "dur": 0.743, + "args": { + "External id": 507071,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143432907.439, "dur": 3325.898, + "args": { + "External id": 507072,"Record function id": 0, "Ev Idx": 4799 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.5)", "pid": 2338711, "tid": 2379440, + "ts": 6274143432931.738, "dur": 1258.047, + "args": { + "External id": 507073,"Record function id": 0, "Ev Idx": 4800 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2338711, "tid": 2379440, + "ts": 6274143432949.932, "dur": 412.748, + "args": { + "External id": 507074,"Record function id": 0, "Ev Idx": 4801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143433045.136, "dur": 4.464, + "args": { + "External id": 507075,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143433089.940, "dur": 1.444, + "args": { + "External id": 507076,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143433094.957, "dur": 3.207, + "args": { + "External id": 507077,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143433100.109, "dur": 0.613, + "args": { + "External id": 507078,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143433102.280, "dur": 0.978, + "args": { + "External id": 507079,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143433104.881, "dur": 1.170, + "args": { + "External id": 507080,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143433107.784, "dur": 2.036, + "args": { + "External id": 507081,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143433113.227, "dur": 0.648, + "args": { + "External id": 507082,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143433115.583, "dur": 0.650, + "args": { + "External id": 507083,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143433117.856, "dur": 0.800, + "args": { + "External id": 507084,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143433142.553, "dur": 183.798, + "args": { + "External id": 507085,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143433160.470, "dur": 160.821, + "args": { + "External id": 507086,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143433188.067, "dur": 21.968, + "args": { + "External id": 507087,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143433213.981, "dur": 77.143, + "args": { + "External id": 507088,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143433216.841, "dur": 73.924, + "args": { + "External id": 507089,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143433221.595, "dur": 6.407, + "args": { + "External id": 507090,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143433229.742, "dur": 60.518, + "args": { + "External id": 507091,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4818 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.4", "pid": 2338711, "tid": 2379440, + "ts": 6274143433451.983, "dur": 730.192, + "args": { + "External id": 507092,"Record function id": 0, "Ev Idx": 4819 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2338711, "tid": 2379440, + "ts": 6274143433470.600, "dur": 697.038, + "args": { + "External id": 507093,"Record function id": 0, "Ev Idx": 4820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143433529.802, "dur": 5.527, + "args": { + "External id": 507094,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143433554.926, "dur": 58.038, + "args": { + "External id": 507095,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143433560.983, "dur": 3.084, + "args": { + "External id": 507096,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143433566.240, "dur": 0.627, + "args": { + "External id": 507097,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143433568.354, "dur": 0.339, + "args": { + "External id": 507098,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143433571.408, "dur": 0.416, + "args": { + "External id": 507099,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143433573.388, "dur": 0.447, + "args": { + "External id": 507100,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143433575.438, "dur": 2.711, + "args": { + "External id": 507101,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143433581.028, "dur": 0.465, + "args": { + "External id": 507102,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143433583.048, "dur": 0.356, + "args": { + "External id": 507103,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143433603.597, "dur": 2.826, + "args": { + "External id": 507104,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143433633.645, "dur": 57.913, + "args": { + "External id": 507105,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274143433727.813, "dur": 122.505, + "args": { + "External id": 507106,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143433739.362, "dur": 5.777, + "args": { + "External id": 507107,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274143433750.666, "dur": 11.445, + "args": { + "External id": 507108,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274143433755.497, "dur": 6.206, + "args": { + "External id": 507109,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143433759.643, "dur": 0.644, + "args": { + "External id": 507110,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143433769.668, "dur": 29.152, + "args": { + "External id": 507111,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143433772.616, "dur": 0.601, + "args": { + "External id": 507112,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143433775.008, "dur": 0.441, + "args": { + "External id": 507113,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143433777.069, "dur": 3.961, + "args": { + "External id": 507114,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143433782.657, "dur": 0.470, + "args": { + "External id": 507115,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143433784.755, "dur": 0.279, + "args": { + "External id": 507116,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143433787.633, "dur": 0.440, + "args": { + "External id": 507117,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143433789.571, "dur": 0.384, + "args": { + "External id": 507118,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143433791.654, "dur": 0.358, + "args": { + "External id": 507119,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143433794.443, "dur": 0.386, + "args": { + "External id": 507120,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143433810.124, "dur": 32.169, + "args": { + "External id": 507121,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143433903.783, "dur": 134.078, + "args": { + "External id": 507122,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143433939.878, "dur": 94.251, + "args": { + "External id": 507123,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4850, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143433951.487, "dur": 78.082, + "args": { + "External id": 507124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143434097.969, "dur": 3.737, + "args": { + "External id": 507125,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4852, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143434198.195, "dur": 2013.169, + "args": { + "External id": 507126,"Sequence number": 5516022, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4853 + } + }, + { + "ph": "f", "id": 212, "pid": 2338711, "tid": 2379440, "ts": 6274143434198.195, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143434318.589, "dur": 116.322, + "args": { + "External id": 507127,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143434504.754, "dur": 41.804, + "args": { + "External id": 507128,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274143434567.525, "dur": 87.259, + "args": { + "External id": 507129,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143434678.008, "dur": 43.907, + "args": { + "External id": 507130,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143434730.010, "dur": 35.237, + "args": { + "External id": 507131,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143434772.607, "dur": 30.482, + "args": { + "External id": 507132,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143434812.288, "dur": 29.637, + "args": { + "External id": 507133,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143434878.829, "dur": 26.183, + "args": { + "External id": 507134,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143434927.234, "dur": 33.038, + "args": { + "External id": 507135,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143434985.938, "dur": 18.904, + "args": { + "External id": 507136,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143435021.046, "dur": 15.458, + "args": { + "External id": 507137,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143435044.519, "dur": 81.018, + "args": { + "External id": 507138,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143435131.591, "dur": 35.862, + "args": { + "External id": 507139,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274143435203.912, "dur": 261.815, + "args": { + "External id": 507140,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143435289.273, "dur": 7.282, + "args": { + "External id": 507141,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143435299.171, "dur": 2.649, + "args": { + "External id": 507142,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143435303.466, "dur": 3.553, + "args": { + "External id": 507143,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143435308.433, "dur": 3.413, + "args": { + "External id": 507144,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143435357.759, "dur": 8.194, + "args": { + "External id": 507145,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143435362.342, "dur": 3.403, + "args": { + "External id": 507146,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143435367.888, "dur": 34.551, + "args": { + "External id": 507147,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143435374.238, "dur": 3.686, + "args": { + "External id": 507148,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143435404.149, "dur": 1.657, + "args": { + "External id": 507149,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143435404.978, "dur": 0.779, + "args": { + "External id": 507150,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143435407.025, "dur": 15.863, + "args": { + "External id": 507151,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143435409.163, "dur": 0.542, + "args": { + "External id": 507152,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143435504.933, "dur": 29.173, + "args": { + "External id": 507153,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143435557.662, "dur": 19.121, + "args": { + "External id": 507154,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143435584.313, "dur": 87.641, + "args": { + "External id": 507155,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143435684.303, "dur": 46.171, + "args": { + "External id": 507156,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143435753.482, "dur": 36.207, + "args": { + "External id": 507157,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143435803.029, "dur": 39.748, + "args": { + "External id": 507158,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143435850.830, "dur": 29.964, + "args": { + "External id": 507159,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143435886.951, "dur": 31.995, + "args": { + "External id": 507160,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274143435953.179, "dur": 27.137, + "args": { + "External id": 507161,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143435997.909, "dur": 26.501, + "args": { + "External id": 507162,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143436042.165, "dur": 54.151, + "args": { + "External id": 507163,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143436123.031, "dur": 18.841, + "args": { + "External id": 507164,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274143436158.576, "dur": 19.122, + "args": { + "External id": 507165,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143436258.699, "dur": 16.453, + "args": { + "External id": 507166,"Record function id": 0, "Ev Idx": 4893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143436262.013, "dur": 12.093, + "args": { + "External id": 507167,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143436267.083, "dur": 6.063, + "args": { + "External id": 507168,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143436268.516, "dur": 4.537, + "args": { + "External id": 507169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143436279.063, "dur": 5.059, + "args": { + "External id": 507170,"Record function id": 0, "Ev Idx": 4897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143436280.743, "dur": 2.940, + "args": { + "External id": 507171,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143436281.710, "dur": 1.490, + "args": { + "External id": 507172,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143436282.377, "dur": 0.716, + "args": { + "External id": 507173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143436287.592, "dur": 6.960, + "args": { + "External id": 507174,"Record function id": 0, "Ev Idx": 4901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143436288.870, "dur": 5.124, + "args": { + "External id": 507175,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143436289.632, "dur": 3.960, + "args": { + "External id": 507176,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143436290.200, "dur": 3.288, + "args": { + "External id": 507177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143436297.787, "dur": 4.013, + "args": { + "External id": 507178,"Record function id": 0, "Ev Idx": 4905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143436299.107, "dur": 2.289, + "args": { + "External id": 507179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143436299.668, "dur": 1.328, + "args": { + "External id": 507180,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143436300.105, "dur": 0.818, + "args": { + "External id": 507181,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143436304.914, "dur": 3.898, + "args": { + "External id": 507182,"Record function id": 0, "Ev Idx": 4909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143436306.252, "dur": 2.128, + "args": { + "External id": 507183,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143436306.754, "dur": 1.195, + "args": { + "External id": 507184,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143436307.139, "dur": 0.729, + "args": { + "External id": 507185,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143436311.834, "dur": 4.382, + "args": { + "External id": 507186,"Record function id": 0, "Ev Idx": 4913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143436313.127, "dur": 2.685, + "args": { + "External id": 507187,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143436313.997, "dur": 1.422, + "args": { + "External id": 507188,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143436314.706, "dur": 0.639, + "args": { + "External id": 507189,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143436319.446, "dur": 4.555, + "args": { + "External id": 507190,"Record function id": 0, "Ev Idx": 4917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143436320.887, "dur": 2.679, + "args": { + "External id": 507191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143436321.598, "dur": 1.563, + "args": { + "External id": 507192,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143436322.075, "dur": 1.018, + "args": { + "External id": 507193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143436327.010, "dur": 4.073, + "args": { + "External id": 507194,"Record function id": 0, "Ev Idx": 4921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143436328.211, "dur": 2.445, + "args": { + "External id": 507195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143436329.138, "dur": 1.030, + "args": { + "External id": 507196,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143436329.399, "dur": 0.693, + "args": { + "External id": 507197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143436334.472, "dur": 6.056, + "args": { + "External id": 507198,"Record function id": 0, "Ev Idx": 4925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143436335.910, "dur": 4.210, + "args": { + "External id": 507199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143436336.376, "dur": 3.329, + "args": { + "External id": 507200,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143436338.966, "dur": 0.661, + "args": { + "External id": 507201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143436344.799, "dur": 62033.778, + "args": { + "External id": 507202,"Record function id": 0, "Sequence number": 5516021, "Fwd thread id": 1, "Ev Idx": 4929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143436347.007, "dur": 62021.323, + "args": { + "External id": 507203,"Sequence number": 5516021, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4930 + } + }, + { + "ph": "f", "id": 213, "pid": 2338711, "tid": 2379440, "ts": 6274143436347.007, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.5)", "pid": 2338711, "tid": 2379440, + "ts": 6274143436381.280, "dur": 43.654, + "args": { + "External id": 507204,"Record function id": 0, "Ev Idx": 4931 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.5)", "pid": 2338711, "tid": 2379440, + "ts": 6274143436433.607, "dur": 65.696, + "args": { + "External id": 507205,"Record function id": 0, "Ev Idx": 4932 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.5)", "pid": 2338711, "tid": 2379440, + "ts": 6274143436505.954, "dur": 61853.436, + "args": { + "External id": 507206,"Record function id": 0, "Ev Idx": 4933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143436630.796, "dur": 10.274, + "args": { + "External id": 507207,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143436656.581, "dur": 7.897, + "args": { + "External id": 507208,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143436687.311, "dur": 60688.241, + "args": { + "External id": 507209,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143436705.189, "dur": 60651.318, + "args": { + "External id": 507210,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143436799.270, "dur": 21.229, + "args": { + "External id": 507211,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143436840.890, "dur": 60462.696, + "args": { + "External id": 507212,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143436844.269, "dur": 60458.073, + "args": { + "External id": 507213,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143436851.821, "dur": 12.001, + "args": { + "External id": 507214,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143436866.292, "dur": 60429.461, + "args": { + "External id": 507215,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143497498.162, "dur": 14.054, + "args": { + "External id": 507216,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143497502.373, "dur": 9.397, + "args": { + "External id": 507217,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143497545.122, "dur": 433.701, + "args": { + "External id": 507218,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143497582.071, "dur": 390.556, + "args": { + "External id": 507219,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4946, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143497617.075, "dur": 349.375, + "args": { + "External id": 507220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143498007.693, "dur": 2.622, + "args": { + "External id": 507221,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4948, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143498134.666, "dur": 8.062, + "args": { + "External id": 507222,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143498191.151, "dur": 1.247, + "args": { + "External id": 507223,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143498208.716, "dur": 3.761, + "args": { + "External id": 507224,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143498224.280, "dur": 0.827, + "args": { + "External id": 507225,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143498236.533, "dur": 0.978, + "args": { + "External id": 507226,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143498248.147, "dur": 0.717, + "args": { + "External id": 507227,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143498262.044, "dur": 3.300, + "args": { + "External id": 507228,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143498275.891, "dur": 2.295, + "args": { + "External id": 507229,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143498288.090, "dur": 0.784, + "args": { + "External id": 507230,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143498396.409, "dur": 3334.285, + "args": { + "External id": 507231,"Record function id": 0, "Ev Idx": 4958 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.4)", "pid": 2338711, "tid": 2379440, + "ts": 6274143498418.668, "dur": 1262.409, + "args": { + "External id": 507232,"Record function id": 0, "Ev Idx": 4959 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2338711, "tid": 2379440, + "ts": 6274143498435.426, "dur": 415.000, + "args": { + "External id": 507233,"Record function id": 0, "Ev Idx": 4960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143498529.513, "dur": 4.386, + "args": { + "External id": 507234,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143498537.924, "dur": 0.895, + "args": { + "External id": 507235,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143498540.770, "dur": 2.941, + "args": { + "External id": 507236,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143498545.572, "dur": 1.033, + "args": { + "External id": 507237,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143498550.222, "dur": 1.009, + "args": { + "External id": 507238,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143498553.434, "dur": 0.551, + "args": { + "External id": 507239,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143498555.772, "dur": 2.259, + "args": { + "External id": 507240,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143498559.548, "dur": 0.760, + "args": { + "External id": 507241,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143498564.085, "dur": 0.771, + "args": { + "External id": 507242,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143498566.637, "dur": 0.558, + "args": { + "External id": 507243,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143498613.824, "dur": 195.857, + "args": { + "External id": 507244,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143498635.249, "dur": 168.134, + "args": { + "External id": 507245,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143498664.996, "dur": 21.753, + "args": { + "External id": 507246,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143498692.248, "dur": 78.246, + "args": { + "External id": 507247,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143498698.280, "dur": 71.824, + "args": { + "External id": 507248,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143498703.109, "dur": 7.109, + "args": { + "External id": 507249,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143498711.993, "dur": 57.474, + "args": { + "External id": 507250,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4977 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.3", "pid": 2338711, "tid": 2379440, + "ts": 6274143498948.989, "dur": 721.280, + "args": { + "External id": 507251,"Record function id": 0, "Ev Idx": 4978 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2338711, "tid": 2379440, + "ts": 6274143498968.093, "dur": 683.491, + "args": { + "External id": 507252,"Record function id": 0, "Ev Idx": 4979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143499033.253, "dur": 5.327, + "args": { + "External id": 507253,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143499097.035, "dur": 40.046, + "args": { + "External id": 507254,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143499103.560, "dur": 3.865, + "args": { + "External id": 507255,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143499109.784, "dur": 0.502, + "args": { + "External id": 507256,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143499112.122, "dur": 0.608, + "args": { + "External id": 507257,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143499116.309, "dur": 0.427, + "args": { + "External id": 507258,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143499118.196, "dur": 0.654, + "args": { + "External id": 507259,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143499120.224, "dur": 2.837, + "args": { + "External id": 507260,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143499126.465, "dur": 0.333, + "args": { + "External id": 507261,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143499128.220, "dur": 0.403, + "args": { + "External id": 507262,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143499130.044, "dur": 1.854, + "args": { + "External id": 507263,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143499150.206, "dur": 48.494, + "args": { + "External id": 507264,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274143499237.759, "dur": 126.489, + "args": { + "External id": 507265,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143499249.411, "dur": 5.178, + "args": { + "External id": 507266,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274143499260.678, "dur": 10.590, + "args": { + "External id": 507267,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274143499265.351, "dur": 5.478, + "args": { + "External id": 507268,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143499269.177, "dur": 0.490, + "args": { + "External id": 507269,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143499279.084, "dur": 29.603, + "args": { + "External id": 507270,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143499282.014, "dur": 0.577, + "args": { + "External id": 507271,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143499284.707, "dur": 0.565, + "args": { + "External id": 507272,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143499286.946, "dur": 3.827, + "args": { + "External id": 507273,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143499292.131, "dur": 0.530, + "args": { + "External id": 507274,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143499294.176, "dur": 0.649, + "args": { + "External id": 507275,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143499297.570, "dur": 0.296, + "args": { + "External id": 507276,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143499299.436, "dur": 0.295, + "args": { + "External id": 507277,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143499300.913, "dur": 0.644, + "args": { + "External id": 507278,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143499304.206, "dur": 0.459, + "args": { + "External id": 507279,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143499321.942, "dur": 33.876, + "args": { + "External id": 507280,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143499413.019, "dur": 129.514, + "args": { + "External id": 507281,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 5008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143499445.050, "dur": 93.773, + "args": { + "External id": 507282,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5009, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143499455.152, "dur": 79.087, + "args": { + "External id": 507283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 5010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143499563.564, "dur": 2.038, + "args": { + "External id": 507284,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5011, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143499691.323, "dur": 2017.034, + "args": { + "External id": 507285,"Sequence number": 5516020, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5012 + } + }, + { + "ph": "f", "id": 214, "pid": 2338711, "tid": 2379440, "ts": 6274143499691.323, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143499817.090, "dur": 128.511, + "args": { + "External id": 507286,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143499996.710, "dur": 42.203, + "args": { + "External id": 507287,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274143500099.139, "dur": 63.291, + "args": { + "External id": 507288,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143500177.415, "dur": 32.764, + "args": { + "External id": 507289,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143500217.351, "dur": 35.042, + "args": { + "External id": 507290,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143500258.826, "dur": 28.237, + "args": { + "External id": 507291,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143500293.829, "dur": 30.106, + "args": { + "External id": 507292,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143500354.272, "dur": 28.485, + "args": { + "External id": 507293,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143500413.946, "dur": 30.713, + "args": { + "External id": 507294,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143500469.839, "dur": 21.182, + "args": { + "External id": 507295,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143500505.547, "dur": 17.508, + "args": { + "External id": 507296,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143500530.874, "dur": 36.476, + "args": { + "External id": 507297,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143500570.893, "dur": 60.930, + "args": { + "External id": 507298,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274143500677.108, "dur": 277.289, + "args": { + "External id": 507299,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143500764.848, "dur": 7.794, + "args": { + "External id": 507300,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143500775.413, "dur": 2.289, + "args": { + "External id": 507301,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143500779.539, "dur": 3.310, + "args": { + "External id": 507302,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143500784.530, "dur": 2.276, + "args": { + "External id": 507303,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143500836.203, "dur": 6.346, + "args": { + "External id": 507304,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143500839.016, "dur": 3.295, + "args": { + "External id": 507305,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143500847.230, "dur": 38.493, + "args": { + "External id": 507306,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143500854.543, "dur": 3.752, + "args": { + "External id": 507307,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143500887.199, "dur": 1.851, + "args": { + "External id": 507308,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143500888.234, "dur": 0.729, + "args": { + "External id": 507309,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143500890.565, "dur": 19.360, + "args": { + "External id": 507310,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143500895.326, "dur": 0.757, + "args": { + "External id": 507311,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143500995.314, "dur": 49.278, + "args": { + "External id": 507312,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143501114.032, "dur": 24.917, + "args": { + "External id": 507313,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143501151.729, "dur": 62.973, + "args": { + "External id": 507314,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143501225.985, "dur": 43.747, + "args": { + "External id": 507315,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143501280.720, "dur": 25.410, + "args": { + "External id": 507316,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143501311.821, "dur": 32.819, + "args": { + "External id": 507317,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143501355.006, "dur": 30.141, + "args": { + "External id": 507318,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143501391.724, "dur": 32.957, + "args": { + "External id": 507319,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274143501446.188, "dur": 27.036, + "args": { + "External id": 507320,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143501493.486, "dur": 24.822, + "args": { + "External id": 507321,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143501537.819, "dur": 19.635, + "args": { + "External id": 507322,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143501574.977, "dur": 42.324, + "args": { + "External id": 507323,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274143501647.166, "dur": 25.930, + "args": { + "External id": 507324,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143501756.099, "dur": 17.284, + "args": { + "External id": 507325,"Record function id": 0, "Ev Idx": 5052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143501760.134, "dur": 12.284, + "args": { + "External id": 507326,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143501764.896, "dur": 6.648, + "args": { + "External id": 507327,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143501766.891, "dur": 4.562, + "args": { + "External id": 507328,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143501777.305, "dur": 5.399, + "args": { + "External id": 507329,"Record function id": 0, "Ev Idx": 5056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143501779.177, "dur": 3.144, + "args": { + "External id": 507330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143501780.222, "dur": 1.649, + "args": { + "External id": 507331,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143501780.753, "dur": 1.019, + "args": { + "External id": 507332,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143501786.181, "dur": 6.905, + "args": { + "External id": 507333,"Record function id": 0, "Ev Idx": 5060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143501787.543, "dur": 5.125, + "args": { + "External id": 507334,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143501788.293, "dur": 3.933, + "args": { + "External id": 507335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143501788.883, "dur": 3.239, + "args": { + "External id": 507336,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143501796.156, "dur": 4.280, + "args": { + "External id": 507337,"Record function id": 0, "Ev Idx": 5064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143501797.518, "dur": 2.525, + "args": { + "External id": 507338,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143501798.371, "dur": 1.262, + "args": { + "External id": 507339,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143501798.864, "dur": 0.674, + "args": { + "External id": 507340,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143501803.355, "dur": 3.854, + "args": { + "External id": 507341,"Record function id": 0, "Ev Idx": 5068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143501804.643, "dur": 2.148, + "args": { + "External id": 507342,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143501805.152, "dur": 1.207, + "args": { + "External id": 507343,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143501805.625, "dur": 0.659, + "args": { + "External id": 507344,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143501810.127, "dur": 4.579, + "args": { + "External id": 507345,"Record function id": 0, "Ev Idx": 5072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143501811.268, "dur": 3.022, + "args": { + "External id": 507346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143501812.165, "dur": 1.709, + "args": { + "External id": 507347,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143501812.881, "dur": 0.916, + "args": { + "External id": 507348,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143501817.664, "dur": 4.033, + "args": { + "External id": 507349,"Record function id": 0, "Ev Idx": 5076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143501819.246, "dur": 2.038, + "args": { + "External id": 507350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143501819.724, "dur": 1.170, + "args": { + "External id": 507351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143501820.189, "dur": 0.628, + "args": { + "External id": 507352,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143501824.573, "dur": 4.319, + "args": { + "External id": 507353,"Record function id": 0, "Ev Idx": 5080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143501825.787, "dur": 2.710, + "args": { + "External id": 507354,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143501826.651, "dur": 1.421, + "args": { + "External id": 507355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143501827.153, "dur": 0.843, + "args": { + "External id": 507356,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143501832.152, "dur": 6.182, + "args": { + "External id": 507357,"Record function id": 0, "Ev Idx": 5084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143501833.560, "dur": 4.365, + "args": { + "External id": 507358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143501834.006, "dur": 3.505, + "args": { + "External id": 507359,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143501836.803, "dur": 0.620, + "args": { + "External id": 507360,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143501842.422, "dur": 62248.061, + "args": { + "External id": 507361,"Record function id": 0, "Sequence number": 5516019, "Fwd thread id": 1, "Ev Idx": 5088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143501844.220, "dur": 62198.248, + "args": { + "External id": 507362,"Sequence number": 5516019, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5089 + } + }, + { + "ph": "f", "id": 215, "pid": 2338711, "tid": 2379440, "ts": 6274143501844.220, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.4)", "pid": 2338711, "tid": 2379440, + "ts": 6274143501879.177, "dur": 44.880, + "args": { + "External id": 507363,"Record function id": 0, "Ev Idx": 5090 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.4)", "pid": 2338711, "tid": 2379440, + "ts": 6274143501933.081, "dur": 70.198, + "args": { + "External id": 507364,"Record function id": 0, "Ev Idx": 5091 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.4)", "pid": 2338711, "tid": 2379440, + "ts": 6274143502009.859, "dur": 62023.175, + "args": { + "External id": 507365,"Record function id": 0, "Ev Idx": 5092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143502149.983, "dur": 9.331, + "args": { + "External id": 507366,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143502173.456, "dur": 7.684, + "args": { + "External id": 507367,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143502198.025, "dur": 60791.066, + "args": { + "External id": 507368,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143502214.037, "dur": 60759.742, + "args": { + "External id": 507369,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143502306.068, "dur": 20.949, + "args": { + "External id": 507370,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143502348.065, "dur": 60572.753, + "args": { + "External id": 507371,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143502351.229, "dur": 60568.213, + "args": { + "External id": 507372,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143502356.635, "dur": 14.960, + "args": { + "External id": 507373,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143502373.736, "dur": 60538.839, + "args": { + "External id": 507374,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143563142.190, "dur": 14.359, + "args": { + "External id": 507375,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143563146.222, "dur": 9.715, + "args": { + "External id": 507376,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143563189.703, "dur": 498.625, + "args": { + "External id": 507377,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143563226.522, "dur": 455.137, + "args": { + "External id": 507378,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5105, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143563241.147, "dur": 432.602, + "args": { + "External id": 507379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143563720.279, "dur": 2.675, + "args": { + "External id": 507380,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5107, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143563806.938, "dur": 7.603, + "args": { + "External id": 507381,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143563861.622, "dur": 4.760, + "args": { + "External id": 507382,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143563882.889, "dur": 3.573, + "args": { + "External id": 507383,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143563897.925, "dur": 0.894, + "args": { + "External id": 507384,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143563911.325, "dur": 0.795, + "args": { + "External id": 507385,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143563923.523, "dur": 0.718, + "args": { + "External id": 507386,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143563939.945, "dur": 3.074, + "args": { + "External id": 507387,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143563952.734, "dur": 2.466, + "args": { + "External id": 507388,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143563964.288, "dur": 0.695, + "args": { + "External id": 507389,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143564109.047, "dur": 3299.856, + "args": { + "External id": 507390,"Record function id": 0, "Ev Idx": 5117 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.3)", "pid": 2338711, "tid": 2379440, + "ts": 6274143564134.045, "dur": 1269.353, + "args": { + "External id": 507391,"Record function id": 0, "Ev Idx": 5118 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2338711, "tid": 2379440, + "ts": 6274143564150.876, "dur": 415.076, + "args": { + "External id": 507392,"Record function id": 0, "Ev Idx": 5119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143564292.314, "dur": 6.197, + "args": { + "External id": 507393,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143564303.512, "dur": 0.635, + "args": { + "External id": 507394,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143564305.983, "dur": 2.833, + "args": { + "External id": 507395,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143564312.223, "dur": 0.692, + "args": { + "External id": 507396,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143564314.732, "dur": 0.801, + "args": { + "External id": 507397,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143564317.142, "dur": 0.688, + "args": { + "External id": 507398,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143564319.361, "dur": 2.015, + "args": { + "External id": 507399,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143564324.372, "dur": 0.624, + "args": { + "External id": 507400,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143564326.678, "dur": 0.846, + "args": { + "External id": 507401,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143564329.066, "dur": 0.490, + "args": { + "External id": 507402,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143564349.864, "dur": 181.354, + "args": { + "External id": 507403,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143564367.708, "dur": 158.840, + "args": { + "External id": 507404,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143564391.291, "dur": 17.723, + "args": { + "External id": 507405,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143564413.123, "dur": 77.829, + "args": { + "External id": 507406,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 5133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143564415.910, "dur": 74.724, + "args": { + "External id": 507407,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 5134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143564420.603, "dur": 5.886, + "args": { + "External id": 507408,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143564428.185, "dur": 61.670, + "args": { + "External id": 507409,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 5136 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.2", "pid": 2338711, "tid": 2379440, + "ts": 6274143564699.293, "dur": 695.376, + "args": { + "External id": 507410,"Record function id": 0, "Ev Idx": 5137 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2338711, "tid": 2379440, + "ts": 6274143564720.282, "dur": 661.838, + "args": { + "External id": 507411,"Record function id": 0, "Ev Idx": 5138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143564786.769, "dur": 7.504, + "args": { + "External id": 507412,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143564811.123, "dur": 39.770, + "args": { + "External id": 507413,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143564817.576, "dur": 2.120, + "args": { + "External id": 507414,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143564822.585, "dur": 2.007, + "args": { + "External id": 507415,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143564826.282, "dur": 0.655, + "args": { + "External id": 507416,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143564828.376, "dur": 0.637, + "args": { + "External id": 507417,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143564832.689, "dur": 0.617, + "args": { + "External id": 507418,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143564835.097, "dur": 3.569, + "args": { + "External id": 507419,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143564840.346, "dur": 0.379, + "args": { + "External id": 507420,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143564843.494, "dur": 0.323, + "args": { + "External id": 507421,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143564845.689, "dur": 0.328, + "args": { + "External id": 507422,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143564861.769, "dur": 50.642, + "args": { + "External id": 507423,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274143564945.550, "dur": 168.021, + "args": { + "External id": 507424,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 5151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143564956.324, "dur": 3.746, + "args": { + "External id": 507425,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274143564965.071, "dur": 11.309, + "args": { + "External id": 507426,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274143564970.164, "dur": 5.759, + "args": { + "External id": 507427,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 5154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143564973.986, "dur": 0.669, + "args": { + "External id": 507428,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143564984.011, "dur": 29.917, + "args": { + "External id": 507429,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143564986.510, "dur": 0.715, + "args": { + "External id": 507430,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143564990.693, "dur": 0.438, + "args": { + "External id": 507431,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143564992.713, "dur": 2.944, + "args": { + "External id": 507432,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143564997.246, "dur": 2.004, + "args": { + "External id": 507433,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143565000.955, "dur": 0.340, + "args": { + "External id": 507434,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143565002.984, "dur": 0.551, + "args": { + "External id": 507435,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143565006.251, "dur": 0.564, + "args": { + "External id": 507436,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143565008.222, "dur": 0.303, + "args": { + "External id": 507437,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143565009.867, "dur": 0.551, + "args": { + "External id": 507438,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143565029.588, "dur": 73.421, + "args": { + "External id": 507439,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143565165.425, "dur": 137.480, + "args": { + "External id": 507440,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 5167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143565201.890, "dur": 97.003, + "args": { + "External id": 507441,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5168, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143565212.785, "dur": 81.554, + "args": { + "External id": 507442,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 5169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143565322.540, "dur": 2.016, + "args": { + "External id": 507443,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5170, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143565411.537, "dur": 1973.904, + "args": { + "External id": 507444,"Sequence number": 5516018, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5171 + } + }, + { + "ph": "f", "id": 216, "pid": 2338711, "tid": 2379440, "ts": 6274143565411.537, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143565534.025, "dur": 153.771, + "args": { + "External id": 507445,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143565741.659, "dur": 42.994, + "args": { + "External id": 507446,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274143565803.714, "dur": 59.410, + "args": { + "External id": 507447,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143565875.977, "dur": 32.594, + "args": { + "External id": 507448,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143565915.458, "dur": 33.982, + "args": { + "External id": 507449,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143565956.111, "dur": 27.830, + "args": { + "External id": 507450,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143565990.892, "dur": 29.760, + "args": { + "External id": 507451,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143566047.261, "dur": 74.639, + "args": { + "External id": 507452,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143566144.531, "dur": 35.743, + "args": { + "External id": 507453,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143566206.760, "dur": 20.664, + "args": { + "External id": 507454,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143566241.979, "dur": 14.275, + "args": { + "External id": 507455,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143566273.624, "dur": 44.666, + "args": { + "External id": 507456,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143566322.177, "dur": 34.364, + "args": { + "External id": 507457,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274143566390.979, "dur": 343.580, + "args": { + "External id": 507458,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143566478.850, "dur": 6.735, + "args": { + "External id": 507459,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143566487.846, "dur": 2.690, + "args": { + "External id": 507460,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143566491.987, "dur": 3.389, + "args": { + "External id": 507461,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143566496.916, "dur": 1.858, + "args": { + "External id": 507462,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143566543.519, "dur": 5.523, + "args": { + "External id": 507463,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143566545.643, "dur": 3.185, + "args": { + "External id": 507464,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143566551.144, "dur": 55.053, + "args": { + "External id": 507465,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143566556.557, "dur": 5.151, + "args": { + "External id": 507466,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143566610.019, "dur": 5.491, + "args": { + "External id": 507467,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143566614.038, "dur": 1.259, + "args": { + "External id": 507468,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143566617.114, "dur": 50.572, + "args": { + "External id": 507469,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143566636.785, "dur": 1.639, + "args": { + "External id": 507470,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143566785.618, "dur": 30.336, + "args": { + "External id": 507471,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143566838.722, "dur": 16.606, + "args": { + "External id": 507472,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143566863.750, "dur": 58.234, + "args": { + "External id": 507473,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143566928.315, "dur": 41.241, + "args": { + "External id": 507474,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143566979.882, "dur": 20.034, + "args": { + "External id": 507475,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143567005.484, "dur": 32.147, + "args": { + "External id": 507476,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143567044.486, "dur": 67.554, + "args": { + "External id": 507477,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143567123.487, "dur": 34.661, + "args": { + "External id": 507478,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274143567181.842, "dur": 27.584, + "args": { + "External id": 507479,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143567229.360, "dur": 24.556, + "args": { + "External id": 507480,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143567271.175, "dur": 20.167, + "args": { + "External id": 507481,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143567307.732, "dur": 13.579, + "args": { + "External id": 507482,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274143567337.928, "dur": 17.300, + "args": { + "External id": 507483,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143567433.971, "dur": 15.362, + "args": { + "External id": 507484,"Record function id": 0, "Ev Idx": 5211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143567437.039, "dur": 11.373, + "args": { + "External id": 507485,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143567441.512, "dur": 5.825, + "args": { + "External id": 507486,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143567442.996, "dur": 4.253, + "args": { + "External id": 507487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143567453.200, "dur": 4.501, + "args": { + "External id": 507488,"Record function id": 0, "Ev Idx": 5215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143567454.618, "dur": 2.571, + "args": { + "External id": 507489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143567455.342, "dur": 1.378, + "args": { + "External id": 507490,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143567455.864, "dur": 0.783, + "args": { + "External id": 507491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143567460.995, "dur": 6.328, + "args": { + "External id": 507492,"Record function id": 0, "Ev Idx": 5219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143567462.116, "dur": 4.787, + "args": { + "External id": 507493,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143567462.586, "dur": 3.873, + "args": { + "External id": 507494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143567463.233, "dur": 3.118, + "args": { + "External id": 507495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143567470.606, "dur": 4.663, + "args": { + "External id": 507496,"Record function id": 0, "Ev Idx": 5223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143567472.082, "dur": 2.751, + "args": { + "External id": 507497,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143567472.647, "dur": 1.745, + "args": { + "External id": 507498,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143567473.342, "dur": 0.967, + "args": { + "External id": 507499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143567478.451, "dur": 3.722, + "args": { + "External id": 507500,"Record function id": 0, "Ev Idx": 5227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143567479.847, "dur": 1.942, + "args": { + "External id": 507501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143567480.357, "dur": 1.027, + "args": { + "External id": 507502,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143567480.657, "dur": 0.639, + "args": { + "External id": 507503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143567485.150, "dur": 4.357, + "args": { + "External id": 507504,"Record function id": 0, "Ev Idx": 5231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143567486.303, "dur": 2.807, + "args": { + "External id": 507505,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143567487.464, "dur": 1.213, + "args": { + "External id": 507506,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143567488.000, "dur": 0.600, + "args": { + "External id": 507507,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143567492.644, "dur": 3.745, + "args": { + "External id": 507508,"Record function id": 0, "Ev Idx": 5235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143567494.087, "dur": 1.918, + "args": { + "External id": 507509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143567494.545, "dur": 1.062, + "args": { + "External id": 507510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143567494.865, "dur": 0.655, + "args": { + "External id": 507511,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143567499.386, "dur": 3.802, + "args": { + "External id": 507512,"Record function id": 0, "Ev Idx": 5239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143567500.704, "dur": 2.015, + "args": { + "External id": 507513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143567501.356, "dur": 0.902, + "args": { + "External id": 507514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143567501.624, "dur": 0.548, + "args": { + "External id": 507515,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143567506.789, "dur": 5.320, + "args": { + "External id": 507516,"Record function id": 0, "Ev Idx": 5243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143567507.745, "dur": 3.967, + "args": { + "External id": 507517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143567508.211, "dur": 3.099, + "args": { + "External id": 507518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143567510.668, "dur": 0.571, + "args": { + "External id": 507519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143567516.696, "dur": 75341.898, + "args": { + "External id": 507520,"Record function id": 0, "Sequence number": 5516017, "Fwd thread id": 1, "Ev Idx": 5247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143567518.729, "dur": 75330.685, + "args": { + "External id": 507521,"Sequence number": 5516017, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5248 + } + }, + { + "ph": "f", "id": 217, "pid": 2338711, "tid": 2379440, "ts": 6274143567518.729, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.3)", "pid": 2338711, "tid": 2379440, + "ts": 6274143567552.751, "dur": 65.127, + "args": { + "External id": 507522,"Record function id": 0, "Ev Idx": 5249 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.3)", "pid": 2338711, "tid": 2379440, + "ts": 6274143567630.601, "dur": 78.858, + "args": { + "External id": 507523,"Record function id": 0, "Ev Idx": 5250 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.3)", "pid": 2338711, "tid": 2379440, + "ts": 6274143567716.570, "dur": 75123.856, + "args": { + "External id": 507524,"Record function id": 0, "Ev Idx": 5251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143567819.978, "dur": 8.680, + "args": { + "External id": 507525,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143567842.620, "dur": 7.374, + "args": { + "External id": 507526,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143567865.418, "dur": 74090.995, + "args": { + "External id": 507527,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143567880.443, "dur": 74061.049, + "args": { + "External id": 507528,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143567975.679, "dur": 19.590, + "args": { + "External id": 507529,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143568015.859, "dur": 73872.182, + "args": { + "External id": 507530,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143568019.211, "dur": 73867.567, + "args": { + "External id": 507531,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143568024.872, "dur": 12.117, + "args": { + "External id": 507532,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143568038.995, "dur": 73841.050, + "args": { + "External id": 507533,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143642105.027, "dur": 14.574, + "args": { + "External id": 507534,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143642109.026, "dur": 9.946, + "args": { + "External id": 507535,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143642153.135, "dur": 330.819, + "args": { + "External id": 507536,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143642189.878, "dur": 289.033, + "args": { + "External id": 507537,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5264, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143642205.162, "dur": 267.994, + "args": { + "External id": 507538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143642510.021, "dur": 2.379, + "args": { + "External id": 507539,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5266, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143642577.620, "dur": 7.134, + "args": { + "External id": 507540,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143642660.467, "dur": 2.702, + "args": { + "External id": 507541,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143642685.333, "dur": 4.182, + "args": { + "External id": 507542,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143642705.262, "dur": 1.148, + "args": { + "External id": 507543,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143642720.747, "dur": 0.906, + "args": { + "External id": 507544,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143642732.598, "dur": 0.920, + "args": { + "External id": 507545,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143642743.315, "dur": 3.025, + "args": { + "External id": 507546,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143642756.363, "dur": 2.160, + "args": { + "External id": 507547,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143642768.974, "dur": 0.682, + "args": { + "External id": 507548,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143642876.657, "dur": 3160.044, + "args": { + "External id": 507549,"Record function id": 0, "Ev Idx": 5276 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.2)", "pid": 2338711, "tid": 2379440, + "ts": 6274143642898.662, "dur": 1241.597, + "args": { + "External id": 507550,"Record function id": 0, "Ev Idx": 5277 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2338711, "tid": 2379440, + "ts": 6274143642916.043, "dur": 398.734, + "args": { + "External id": 507551,"Record function id": 0, "Ev Idx": 5278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143643012.757, "dur": 4.385, + "args": { + "External id": 507552,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143643020.859, "dur": 0.783, + "args": { + "External id": 507553,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143643023.678, "dur": 3.049, + "args": { + "External id": 507554,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143643028.616, "dur": 0.712, + "args": { + "External id": 507555,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143643031.196, "dur": 0.797, + "args": { + "External id": 507556,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143643033.411, "dur": 0.708, + "args": { + "External id": 507557,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143643037.558, "dur": 1.957, + "args": { + "External id": 507558,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143643041.123, "dur": 0.654, + "args": { + "External id": 507559,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143643043.218, "dur": 0.732, + "args": { + "External id": 507560,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143643045.577, "dur": 0.541, + "args": { + "External id": 507561,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143643105.929, "dur": 173.322, + "args": { + "External id": 507562,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143643124.348, "dur": 149.356, + "args": { + "External id": 507563,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143643149.498, "dur": 16.906, + "args": { + "External id": 507564,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143643170.463, "dur": 71.746, + "args": { + "External id": 507565,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 5292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143643173.308, "dur": 68.591, + "args": { + "External id": 507566,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 5293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143643178.070, "dur": 6.185, + "args": { + "External id": 507567,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143643185.985, "dur": 55.217, + "args": { + "External id": 507568,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 5295 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.1", "pid": 2338711, "tid": 2379440, + "ts": 6274143643405.361, "dur": 726.915, + "args": { + "External id": 507569,"Record function id": 0, "Ev Idx": 5296 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2338711, "tid": 2379440, + "ts": 6274143643424.139, "dur": 692.801, + "args": { + "External id": 507570,"Record function id": 0, "Ev Idx": 5297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143643485.357, "dur": 5.763, + "args": { + "External id": 507571,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143643507.634, "dur": 41.313, + "args": { + "External id": 507572,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143643513.604, "dur": 3.542, + "args": { + "External id": 507573,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143643524.467, "dur": 0.569, + "args": { + "External id": 507574,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143643526.613, "dur": 0.469, + "args": { + "External id": 507575,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143643529.873, "dur": 0.250, + "args": { + "External id": 507576,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143643531.548, "dur": 0.325, + "args": { + "External id": 507577,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143643533.057, "dur": 4.006, + "args": { + "External id": 507578,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143643538.430, "dur": 0.354, + "args": { + "External id": 507579,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143643540.315, "dur": 0.318, + "args": { + "External id": 507580,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143643543.829, "dur": 0.331, + "args": { + "External id": 507581,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143643559.947, "dur": 73.473, + "args": { + "External id": 507582,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274143643679.604, "dur": 144.432, + "args": { + "External id": 507583,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 5310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143643694.117, "dur": 6.038, + "args": { + "External id": 507584,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274143643706.714, "dur": 14.282, + "args": { + "External id": 507585,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274143643711.965, "dur": 8.408, + "args": { + "External id": 507586,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 5313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143643716.624, "dur": 1.357, + "args": { + "External id": 507587,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143643730.855, "dur": 30.800, + "args": { + "External id": 507588,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143643734.051, "dur": 0.502, + "args": { + "External id": 507589,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143643736.807, "dur": 0.288, + "args": { + "External id": 507590,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143643740.357, "dur": 2.964, + "args": { + "External id": 507591,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143643745.277, "dur": 0.344, + "args": { + "External id": 507592,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143643747.114, "dur": 0.545, + "args": { + "External id": 507593,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143643750.688, "dur": 0.547, + "args": { + "External id": 507594,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143643752.575, "dur": 0.386, + "args": { + "External id": 507595,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143643754.473, "dur": 1.578, + "args": { + "External id": 507596,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143643757.555, "dur": 0.374, + "args": { + "External id": 507597,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143643778.616, "dur": 36.911, + "args": { + "External id": 507598,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143643874.255, "dur": 128.051, + "args": { + "External id": 507599,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 5326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143643907.169, "dur": 91.330, + "args": { + "External id": 507600,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5327, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143643917.455, "dur": 76.545, + "args": { + "External id": 507601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 5328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143644021.289, "dur": 2.116, + "args": { + "External id": 507602,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5329, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143644148.951, "dur": 1866.158, + "args": { + "External id": 507603,"Sequence number": 5516016, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5330 + } + }, + { + "ph": "f", "id": 218, "pid": 2338711, "tid": 2379440, "ts": 6274143644148.951, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143644268.037, "dur": 115.799, + "args": { + "External id": 507604,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143644430.715, "dur": 41.372, + "args": { + "External id": 507605,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274143644490.921, "dur": 52.295, + "args": { + "External id": 507606,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143644556.101, "dur": 52.027, + "args": { + "External id": 507607,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143644621.288, "dur": 50.657, + "args": { + "External id": 507608,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143644680.184, "dur": 35.221, + "args": { + "External id": 507609,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143644723.583, "dur": 30.587, + "args": { + "External id": 507610,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143644787.713, "dur": 27.153, + "args": { + "External id": 507611,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143644836.830, "dur": 28.519, + "args": { + "External id": 507612,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143644890.194, "dur": 20.210, + "args": { + "External id": 507613,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143644926.522, "dur": 14.846, + "args": { + "External id": 507614,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143644950.467, "dur": 41.130, + "args": { + "External id": 507615,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143644994.975, "dur": 34.450, + "args": { + "External id": 507616,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274143645109.887, "dur": 268.552, + "args": { + "External id": 507617,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143645196.612, "dur": 7.661, + "args": { + "External id": 507618,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143645206.707, "dur": 3.870, + "args": { + "External id": 507619,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143645212.353, "dur": 2.835, + "args": { + "External id": 507620,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143645216.939, "dur": 1.928, + "args": { + "External id": 507621,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143645268.703, "dur": 5.741, + "args": { + "External id": 507622,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143645271.341, "dur": 2.917, + "args": { + "External id": 507623,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143645276.321, "dur": 34.405, + "args": { + "External id": 507624,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143645282.357, "dur": 3.947, + "args": { + "External id": 507625,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143645312.270, "dur": 3.904, + "args": { + "External id": 507626,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143645315.556, "dur": 0.529, + "args": { + "External id": 507627,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143645317.346, "dur": 17.568, + "args": { + "External id": 507628,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143645321.691, "dur": 1.563, + "args": { + "External id": 507629,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143645421.562, "dur": 26.891, + "args": { + "External id": 507630,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143645467.891, "dur": 16.332, + "args": { + "External id": 507631,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143645492.270, "dur": 49.504, + "args": { + "External id": 507632,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143645549.477, "dur": 61.117, + "args": { + "External id": 507633,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143645629.467, "dur": 34.582, + "args": { + "External id": 507634,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143645670.917, "dur": 34.635, + "args": { + "External id": 507635,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143645712.502, "dur": 29.897, + "args": { + "External id": 507636,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143645749.566, "dur": 31.225, + "args": { + "External id": 507637,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274143645804.466, "dur": 26.631, + "args": { + "External id": 507638,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143645852.437, "dur": 25.536, + "args": { + "External id": 507639,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143645895.921, "dur": 19.725, + "args": { + "External id": 507640,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143645934.825, "dur": 16.235, + "args": { + "External id": 507641,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274143645964.165, "dur": 18.803, + "args": { + "External id": 507642,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143646108.555, "dur": 26.602, + "args": { + "External id": 507643,"Record function id": 0, "Ev Idx": 5370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143646117.585, "dur": 15.646, + "args": { + "External id": 507644,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143646123.156, "dur": 7.135, + "args": { + "External id": 507645,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143646124.856, "dur": 5.029, + "args": { + "External id": 507646,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143646142.249, "dur": 5.355, + "args": { + "External id": 507647,"Record function id": 0, "Ev Idx": 5374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143646143.949, "dur": 3.256, + "args": { + "External id": 507648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143646144.991, "dur": 1.623, + "args": { + "External id": 507649,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143646145.745, "dur": 0.793, + "args": { + "External id": 507650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143646150.842, "dur": 7.262, + "args": { + "External id": 507651,"Record function id": 0, "Ev Idx": 5378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143646152.322, "dur": 5.367, + "args": { + "External id": 507652,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143646153.088, "dur": 4.078, + "args": { + "External id": 507653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143646153.747, "dur": 3.319, + "args": { + "External id": 507654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143646161.334, "dur": 4.649, + "args": { + "External id": 507655,"Record function id": 0, "Ev Idx": 5382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143646162.884, "dur": 2.654, + "args": { + "External id": 507656,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143646163.361, "dur": 1.774, + "args": { + "External id": 507657,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143646164.078, "dur": 0.950, + "args": { + "External id": 507658,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143646169.002, "dur": 4.669, + "args": { + "External id": 507659,"Record function id": 0, "Ev Idx": 5386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143646170.461, "dur": 2.808, + "args": { + "External id": 507660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143646171.255, "dur": 1.317, + "args": { + "External id": 507661,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143646171.742, "dur": 0.751, + "args": { + "External id": 507662,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143646176.720, "dur": 6.868, + "args": { + "External id": 507663,"Record function id": 0, "Ev Idx": 5390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143646178.453, "dur": 4.762, + "args": { + "External id": 507664,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143646179.367, "dur": 3.432, + "args": { + "External id": 507665,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143646182.088, "dur": 0.632, + "args": { + "External id": 507666,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143646186.709, "dur": 3.592, + "args": { + "External id": 507667,"Record function id": 0, "Ev Idx": 5394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143646187.990, "dur": 1.873, + "args": { + "External id": 507668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143646188.570, "dur": 0.895, + "args": { + "External id": 507669,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143646188.840, "dur": 0.549, + "args": { + "External id": 507670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143646193.291, "dur": 4.194, + "args": { + "External id": 507671,"Record function id": 0, "Ev Idx": 5398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143646194.848, "dur": 2.261, + "args": { + "External id": 507672,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143646195.281, "dur": 1.413, + "args": { + "External id": 507673,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143646195.776, "dur": 0.841, + "args": { + "External id": 507674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143646200.347, "dur": 4.389, + "args": { + "External id": 507675,"Record function id": 0, "Ev Idx": 5402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143646201.888, "dur": 2.403, + "args": { + "External id": 507676,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143646202.529, "dur": 1.367, + "args": { + "External id": 507677,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143646203.059, "dur": 0.752, + "args": { + "External id": 507678,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143646208.912, "dur": 78009.414, + "args": { + "External id": 507679,"Record function id": 0, "Sequence number": 5516015, "Fwd thread id": 1, "Ev Idx": 5406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143646210.757, "dur": 77998.483, + "args": { + "External id": 507680,"Sequence number": 5516015, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5407 + } + }, + { + "ph": "f", "id": 219, "pid": 2338711, "tid": 2379440, "ts": 6274143646210.757, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.2)", "pid": 2338711, "tid": 2379440, + "ts": 6274143646254.682, "dur": 48.439, + "args": { + "External id": 507681,"Record function id": 0, "Ev Idx": 5408 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.2)", "pid": 2338711, "tid": 2379440, + "ts": 6274143646311.625, "dur": 70.155, + "args": { + "External id": 507682,"Record function id": 0, "Ev Idx": 5409 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.2)", "pid": 2338711, "tid": 2379440, + "ts": 6274143646387.956, "dur": 77812.227, + "args": { + "External id": 507683,"Record function id": 0, "Ev Idx": 5410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143646488.235, "dur": 8.595, + "args": { + "External id": 507684,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143646506.763, "dur": 6.641, + "args": { + "External id": 507685,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143646529.098, "dur": 76659.861, + "args": { + "External id": 507686,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143646546.109, "dur": 76627.878, + "args": { + "External id": 507687,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143646668.918, "dur": 27.092, + "args": { + "External id": 507688,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143646721.500, "dur": 76399.311, + "args": { + "External id": 507689,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143646727.139, "dur": 76392.421, + "args": { + "External id": 507690,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143646732.986, "dur": 14.200, + "args": { + "External id": 507691,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143646749.514, "dur": 76363.244, + "args": { + "External id": 507692,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143723303.673, "dur": 12.986, + "args": { + "External id": 507693,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143723307.357, "dur": 8.858, + "args": { + "External id": 507694,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143723347.565, "dur": 485.466, + "args": { + "External id": 507695,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143723382.350, "dur": 443.797, + "args": { + "External id": 507696,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5423, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143723395.858, "dur": 423.781, + "args": { + "External id": 507697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143723864.155, "dur": 2.831, + "args": { + "External id": 507698,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5425, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143723940.352, "dur": 7.789, + "args": { + "External id": 507699,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143723994.641, "dur": 1.340, + "args": { + "External id": 507700,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143724011.286, "dur": 3.997, + "args": { + "External id": 507701,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143724026.965, "dur": 0.960, + "args": { + "External id": 507702,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143724040.239, "dur": 1.112, + "args": { + "External id": 507703,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143724052.171, "dur": 34.987, + "args": { + "External id": 507704,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143724103.857, "dur": 2.905, + "args": { + "External id": 507705,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143724117.588, "dur": 2.118, + "args": { + "External id": 507706,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143724130.657, "dur": 0.756, + "args": { + "External id": 507707,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143724236.231, "dur": 3387.928, + "args": { + "External id": 507708,"Record function id": 0, "Ev Idx": 5435 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.1)", "pid": 2338711, "tid": 2379440, + "ts": 6274143724257.863, "dur": 1183.302, + "args": { + "External id": 507709,"Record function id": 0, "Ev Idx": 5436 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2338711, "tid": 2379440, + "ts": 6274143724275.312, "dur": 393.470, + "args": { + "External id": 507710,"Record function id": 0, "Ev Idx": 5437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143724367.011, "dur": 4.279, + "args": { + "External id": 507711,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143724374.897, "dur": 0.931, + "args": { + "External id": 507712,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143724377.496, "dur": 3.297, + "args": { + "External id": 507713,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143724382.379, "dur": 0.703, + "args": { + "External id": 507714,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143724384.334, "dur": 0.893, + "args": { + "External id": 507715,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143724388.091, "dur": 0.703, + "args": { + "External id": 507716,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143724390.631, "dur": 1.982, + "args": { + "External id": 507717,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143724393.953, "dur": 0.668, + "args": { + "External id": 507718,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143724395.894, "dur": 0.697, + "args": { + "External id": 507719,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143724400.071, "dur": 0.651, + "args": { + "External id": 507720,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143724419.987, "dur": 203.723, + "args": { + "External id": 507721,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143724445.088, "dur": 171.351, + "args": { + "External id": 507722,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143724465.902, "dur": 17.073, + "args": { + "External id": 507723,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143724486.814, "dur": 72.379, + "args": { + "External id": 507724,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 5451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143724489.687, "dur": 69.173, + "args": { + "External id": 507725,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 5452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143724493.845, "dur": 6.737, + "args": { + "External id": 507726,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143724502.363, "dur": 55.633, + "args": { + "External id": 507727,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 5454 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.0", "pid": 2338711, "tid": 2379440, + "ts": 6274143724775.518, "dur": 657.931, + "args": { + "External id": 507728,"Record function id": 0, "Ev Idx": 5455 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2338711, "tid": 2379440, + "ts": 6274143724794.706, "dur": 626.214, + "args": { + "External id": 507729,"Record function id": 0, "Ev Idx": 5456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143724860.558, "dur": 6.900, + "args": { + "External id": 507730,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143724883.640, "dur": 31.263, + "args": { + "External id": 507731,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143724889.128, "dur": 2.029, + "args": { + "External id": 507732,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143724893.312, "dur": 1.395, + "args": { + "External id": 507733,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143724895.974, "dur": 0.562, + "args": { + "External id": 507734,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143724897.955, "dur": 0.373, + "args": { + "External id": 507735,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143724900.885, "dur": 0.353, + "args": { + "External id": 507736,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143724902.288, "dur": 2.654, + "args": { + "External id": 507737,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143724905.928, "dur": 0.331, + "args": { + "External id": 507738,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143724908.269, "dur": 0.543, + "args": { + "External id": 507739,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143724909.909, "dur": 0.424, + "args": { + "External id": 507740,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143724925.772, "dur": 50.194, + "args": { + "External id": 507741,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274143725010.660, "dur": 156.465, + "args": { + "External id": 507742,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 5469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143725021.567, "dur": 3.720, + "args": { + "External id": 507743,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274143725030.336, "dur": 10.318, + "args": { + "External id": 507744,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274143725034.746, "dur": 5.506, + "args": { + "External id": 507745,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 5472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143725038.579, "dur": 0.517, + "args": { + "External id": 507746,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274143725047.871, "dur": 64.074, + "args": { + "External id": 507747,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143725049.866, "dur": 0.466, + "args": { + "External id": 507748,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143725090.275, "dur": 0.606, + "args": { + "External id": 507749,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143725092.856, "dur": 2.405, + "args": { + "External id": 507750,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143725096.600, "dur": 1.080, + "args": { + "External id": 507751,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143725099.287, "dur": 0.465, + "args": { + "External id": 507752,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143725100.998, "dur": 0.474, + "args": { + "External id": 507753,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143725104.557, "dur": 0.410, + "args": { + "External id": 507754,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143725106.170, "dur": 0.395, + "args": { + "External id": 507755,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143725107.700, "dur": 0.492, + "args": { + "External id": 507756,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143725123.782, "dur": 34.912, + "args": { + "External id": 507757,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143725216.928, "dur": 127.314, + "args": { + "External id": 507758,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 5485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143725248.318, "dur": 92.336, + "args": { + "External id": 507759,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5486, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143725258.519, "dur": 77.839, + "args": { + "External id": 507760,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 5487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143725364.439, "dur": 1.961, + "args": { + "External id": 507761,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5488, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143725448.343, "dur": 2135.459, + "args": { + "External id": 507762,"Sequence number": 5516014, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5489 + } + }, + { + "ph": "f", "id": 220, "pid": 2338711, "tid": 2379440, "ts": 6274143725448.343, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143725564.676, "dur": 150.709, + "args": { + "External id": 507763,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143725767.364, "dur": 43.677, + "args": { + "External id": 507764,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274143725829.510, "dur": 57.464, + "args": { + "External id": 507765,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143725898.893, "dur": 33.487, + "args": { + "External id": 507766,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143725938.444, "dur": 33.169, + "args": { + "External id": 507767,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143725977.566, "dur": 28.780, + "args": { + "External id": 507768,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143726013.472, "dur": 29.554, + "args": { + "External id": 507769,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143726112.131, "dur": 30.615, + "args": { + "External id": 507770,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143726163.178, "dur": 29.475, + "args": { + "External id": 507771,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143726217.083, "dur": 18.961, + "args": { + "External id": 507772,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143726251.495, "dur": 17.805, + "args": { + "External id": 507773,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143726281.465, "dur": 94.641, + "args": { + "External id": 507774,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143726380.880, "dur": 65.535, + "args": { + "External id": 507775,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274143726510.728, "dur": 333.422, + "args": { + "External id": 507776,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143726664.462, "dur": 8.097, + "args": { + "External id": 507777,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143726675.025, "dur": 2.959, + "args": { + "External id": 507778,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143726679.055, "dur": 1.846, + "args": { + "External id": 507779,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143726681.890, "dur": 2.328, + "args": { + "External id": 507780,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143726734.982, "dur": 6.587, + "args": { + "External id": 507781,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143726737.007, "dur": 4.333, + "args": { + "External id": 507782,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143726743.630, "dur": 34.181, + "args": { + "External id": 507783,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143726749.483, "dur": 4.050, + "args": { + "External id": 507784,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143726781.295, "dur": 1.727, + "args": { + "External id": 507785,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143726782.265, "dur": 0.662, + "args": { + "External id": 507786,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143726783.972, "dur": 15.162, + "args": { + "External id": 507787,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143726786.602, "dur": 0.698, + "args": { + "External id": 507788,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143726888.501, "dur": 30.277, + "args": { + "External id": 507789,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143726937.099, "dur": 18.704, + "args": { + "External id": 507790,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143726964.495, "dur": 53.925, + "args": { + "External id": 507791,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143727024.758, "dur": 78.505, + "args": { + "External id": 507792,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143727118.679, "dur": 25.187, + "args": { + "External id": 507793,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143727149.710, "dur": 36.351, + "args": { + "External id": 507794,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143727193.522, "dur": 29.598, + "args": { + "External id": 507795,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143727228.901, "dur": 31.989, + "args": { + "External id": 507796,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274143727301.919, "dur": 66.986, + "args": { + "External id": 507797,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143727421.633, "dur": 25.370, + "args": { + "External id": 507798,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143727466.380, "dur": 19.967, + "args": { + "External id": 507799,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143727505.698, "dur": 13.962, + "args": { + "External id": 507800,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274143727535.006, "dur": 16.520, + "args": { + "External id": 507801,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143727653.374, "dur": 21.501, + "args": { + "External id": 507802,"Record function id": 0, "Ev Idx": 5529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143727658.274, "dur": 15.262, + "args": { + "External id": 507803,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143727663.839, "dur": 8.576, + "args": { + "External id": 507804,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143727665.881, "dur": 6.324, + "args": { + "External id": 507805,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143727680.065, "dur": 7.143, + "args": { + "External id": 507806,"Record function id": 0, "Ev Idx": 5533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143727682.456, "dur": 4.040, + "args": { + "External id": 507807,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143727683.605, "dur": 2.216, + "args": { + "External id": 507808,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143727684.529, "dur": 1.131, + "args": { + "External id": 507809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143727691.659, "dur": 9.877, + "args": { + "External id": 507810,"Record function id": 0, "Ev Idx": 5537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143727694.348, "dur": 6.513, + "args": { + "External id": 507811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143727695.681, "dur": 4.439, + "args": { + "External id": 507812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143727696.848, "dur": 3.124, + "args": { + "External id": 507813,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143727706.305, "dur": 6.186, + "args": { + "External id": 507814,"Record function id": 0, "Ev Idx": 5541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143727708.472, "dur": 3.454, + "args": { + "External id": 507815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143727709.886, "dur": 1.604, + "args": { + "External id": 507816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143727710.493, "dur": 0.921, + "args": { + "External id": 507817,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143727715.866, "dur": 4.311, + "args": { + "External id": 507818,"Record function id": 0, "Ev Idx": 5545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143727717.283, "dur": 2.533, + "args": { + "External id": 507819,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143727718.220, "dur": 1.232, + "args": { + "External id": 507820,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143727718.776, "dur": 0.589, + "args": { + "External id": 507821,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143727723.191, "dur": 6.646, + "args": { + "External id": 507822,"Record function id": 0, "Ev Idx": 5549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143727724.636, "dur": 4.763, + "args": { + "External id": 507823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143727725.511, "dur": 3.520, + "args": { + "External id": 507824,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143727727.995, "dur": 0.958, + "args": { + "External id": 507825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143727733.139, "dur": 4.275, + "args": { + "External id": 507826,"Record function id": 0, "Ev Idx": 5553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143727734.565, "dur": 2.450, + "args": { + "External id": 507827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143727735.219, "dur": 1.442, + "args": { + "External id": 507828,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143727735.925, "dur": 0.660, + "args": { + "External id": 507829,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143727740.318, "dur": 4.130, + "args": { + "External id": 507830,"Record function id": 0, "Ev Idx": 5557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143727741.888, "dur": 2.157, + "args": { + "External id": 507831,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143727742.603, "dur": 1.096, + "args": { + "External id": 507832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143727743.074, "dur": 0.547, + "args": { + "External id": 507833,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143727747.838, "dur": 4.305, + "args": { + "External id": 507834,"Record function id": 0, "Ev Idx": 5561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143727749.401, "dur": 2.364, + "args": { + "External id": 507835,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143727750.038, "dur": 1.365, + "args": { + "External id": 507836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143727750.782, "dur": 0.543, + "args": { + "External id": 507837,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143727756.555, "dur": 79185.269, + "args": { + "External id": 507838,"Record function id": 0, "Sequence number": 5516013, "Fwd thread id": 1, "Ev Idx": 5565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143727758.341, "dur": 79173.743, + "args": { + "External id": 507839,"Sequence number": 5516013, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5566 + } + }, + { + "ph": "f", "id": 221, "pid": 2338711, "tid": 2379440, "ts": 6274143727758.341, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.1)", "pid": 2338711, "tid": 2379440, + "ts": 6274143727796.026, "dur": 42.377, + "args": { + "External id": 507840,"Record function id": 0, "Ev Idx": 5567 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.1)", "pid": 2338711, "tid": 2379440, + "ts": 6274143727847.499, "dur": 71.570, + "args": { + "External id": 507841,"Record function id": 0, "Ev Idx": 5568 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.1)", "pid": 2338711, "tid": 2379440, + "ts": 6274143727925.266, "dur": 78997.423, + "args": { + "External id": 507842,"Record function id": 0, "Ev Idx": 5569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143728024.851, "dur": 8.226, + "args": { + "External id": 507843,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143728043.625, "dur": 7.063, + "args": { + "External id": 507844,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143728104.251, "dur": 77884.315, + "args": { + "External id": 507845,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143728119.873, "dur": 77854.111, + "args": { + "External id": 507846,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143728211.213, "dur": 20.592, + "args": { + "External id": 507847,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143728255.855, "dur": 77664.760, + "args": { + "External id": 507848,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143728261.737, "dur": 77657.657, + "args": { + "External id": 507849,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143728266.748, "dur": 14.049, + "args": { + "External id": 507850,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143728282.956, "dur": 77629.858, + "args": { + "External id": 507851,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143806138.756, "dur": 13.981, + "args": { + "External id": 507852,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143806142.548, "dur": 9.670, + "args": { + "External id": 507853,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143806185.872, "dur": 396.817, + "args": { + "External id": 507854,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143806220.997, "dur": 356.477, + "args": { + "External id": 507855,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5582, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143806234.748, "dur": 337.253, + "args": { + "External id": 507856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143806628.611, "dur": 4.295, + "args": { + "External id": 507857,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5584, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143806710.369, "dur": 7.645, + "args": { + "External id": 507858,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143806765.607, "dur": 1.794, + "args": { + "External id": 507859,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143806782.605, "dur": 3.816, + "args": { + "External id": 507860,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143806797.549, "dur": 0.816, + "args": { + "External id": 507861,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143806808.389, "dur": 0.914, + "args": { + "External id": 507862,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143806818.748, "dur": 0.678, + "args": { + "External id": 507863,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143806832.668, "dur": 2.793, + "args": { + "External id": 507864,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143806845.208, "dur": 2.480, + "args": { + "External id": 507865,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143806856.706, "dur": 0.842, + "args": { + "External id": 507866,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143806959.453, "dur": 2480.624, + "args": { + "External id": 507867,"Record function id": 0, "Ev Idx": 5594 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.0)", "pid": 2338711, "tid": 2379440, + "ts": 6274143806982.325, "dur": 515.875, + "args": { + "External id": 507868,"Record function id": 0, "Ev Idx": 5595 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2338711, "tid": 2379440, + "ts": 6274143806998.623, "dur": 398.603, + "args": { + "External id": 507869,"Record function id": 0, "Ev Idx": 5596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143807133.931, "dur": 5.253, + "args": { + "External id": 507870,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143807143.144, "dur": 0.627, + "args": { + "External id": 507871,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143807148.033, "dur": 3.382, + "args": { + "External id": 507872,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143807152.884, "dur": 0.915, + "args": { + "External id": 507873,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143807155.099, "dur": 1.110, + "args": { + "External id": 507874,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143807158.039, "dur": 0.971, + "args": { + "External id": 507875,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143807162.639, "dur": 1.983, + "args": { + "External id": 507876,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143807165.984, "dur": 0.770, + "args": { + "External id": 507877,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143807168.178, "dur": 0.947, + "args": { + "External id": 507878,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143807170.411, "dur": 0.918, + "args": { + "External id": 507879,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143807194.094, "dur": 168.816, + "args": { + "External id": 507880,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143807211.934, "dur": 146.005, + "args": { + "External id": 507881,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143807234.028, "dur": 17.063, + "args": { + "External id": 507882,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143807255.019, "dur": 71.929, + "args": { + "External id": 507883,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 5610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143807257.880, "dur": 68.744, + "args": { + "External id": 507884,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 5611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143807262.065, "dur": 7.038, + "args": { + "External id": 507885,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143807270.838, "dur": 54.840, + "args": { + "External id": 507886,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 5613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143807506.071, "dur": 1911.086, + "args": { + "External id": 507887,"Sequence number": 5516012, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5614 + } + }, + { + "ph": "f", "id": 222, "pid": 2338711, "tid": 2379440, "ts": 6274143807506.071, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143807649.095, "dur": 118.889, + "args": { + "External id": 507888,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143807818.950, "dur": 41.796, + "args": { + "External id": 507889,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274143807880.471, "dur": 50.923, + "args": { + "External id": 507890,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143807941.022, "dur": 35.866, + "args": { + "External id": 507891,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143807985.377, "dur": 33.672, + "args": { + "External id": 507892,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143808026.011, "dur": 66.007, + "args": { + "External id": 507893,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143808105.817, "dur": 35.069, + "args": { + "External id": 507894,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143808169.388, "dur": 27.556, + "args": { + "External id": 507895,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143808216.533, "dur": 29.633, + "args": { + "External id": 507896,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143808270.996, "dur": 19.017, + "args": { + "External id": 507897,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143808304.524, "dur": 15.186, + "args": { + "External id": 507898,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143808329.169, "dur": 37.073, + "args": { + "External id": 507899,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143808370.303, "dur": 32.897, + "args": { + "External id": 507900,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274143808434.718, "dur": 337.421, + "args": { + "External id": 507901,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143808515.593, "dur": 8.086, + "args": { + "External id": 507902,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143808525.970, "dur": 2.405, + "args": { + "External id": 507903,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143808529.812, "dur": 4.477, + "args": { + "External id": 507904,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143808569.956, "dur": 11.706, + "args": { + "External id": 507905,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143808654.814, "dur": 8.474, + "args": { + "External id": 507906,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143808659.033, "dur": 3.712, + "args": { + "External id": 507907,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143808665.612, "dur": 32.619, + "args": { + "External id": 507908,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143808671.916, "dur": 1.647, + "args": { + "External id": 507909,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274143808701.997, "dur": 1.182, + "args": { + "External id": 507910,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143808702.590, "dur": 0.500, + "args": { + "External id": 507911,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274143808704.527, "dur": 15.777, + "args": { + "External id": 507912,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143808707.763, "dur": 0.649, + "args": { + "External id": 507913,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143808817.130, "dur": 30.082, + "args": { + "External id": 507914,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143808866.166, "dur": 15.247, + "args": { + "External id": 507915,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143808892.689, "dur": 52.548, + "args": { + "External id": 507916,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143808952.839, "dur": 40.731, + "args": { + "External id": 507917,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143809003.912, "dur": 22.104, + "args": { + "External id": 507918,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143809031.203, "dur": 70.457, + "args": { + "External id": 507919,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143809113.210, "dur": 33.008, + "args": { + "External id": 507920,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274143809154.155, "dur": 31.446, + "args": { + "External id": 507921,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274143809213.722, "dur": 26.550, + "args": { + "External id": 507922,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143809260.696, "dur": 25.065, + "args": { + "External id": 507923,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143809301.526, "dur": 18.798, + "args": { + "External id": 507924,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274143809338.916, "dur": 16.271, + "args": { + "External id": 507925,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274143809367.896, "dur": 17.633, + "args": { + "External id": 507926,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143809464.351, "dur": 15.843, + "args": { + "External id": 507927,"Record function id": 0, "Ev Idx": 5654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143809467.820, "dur": 11.285, + "args": { + "External id": 507928,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143809472.178, "dur": 5.838, + "args": { + "External id": 507929,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143809473.747, "dur": 4.150, + "args": { + "External id": 507930,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143809484.126, "dur": 7.424, + "args": { + "External id": 507931,"Record function id": 0, "Ev Idx": 5658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143809485.513, "dur": 5.582, + "args": { + "External id": 507932,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143809486.329, "dur": 4.250, + "args": { + "External id": 507933,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143809486.825, "dur": 3.650, + "args": { + "External id": 507934,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143809494.900, "dur": 4.339, + "args": { + "External id": 507935,"Record function id": 0, "Ev Idx": 5662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143809496.286, "dur": 2.544, + "args": { + "External id": 507936,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143809496.877, "dur": 1.487, + "args": { + "External id": 507937,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143809497.431, "dur": 0.859, + "args": { + "External id": 507938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143809502.453, "dur": 4.093, + "args": { + "External id": 507939,"Record function id": 0, "Ev Idx": 5666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143809504.191, "dur": 1.952, + "args": { + "External id": 507940,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143809504.710, "dur": 1.024, + "args": { + "External id": 507941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143809505.020, "dur": 0.637, + "args": { + "External id": 507942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143809509.629, "dur": 4.351, + "args": { + "External id": 507943,"Record function id": 0, "Ev Idx": 5670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143809511.471, "dur": 2.093, + "args": { + "External id": 507944,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143809512.070, "dur": 1.090, + "args": { + "External id": 507945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143809512.366, "dur": 0.714, + "args": { + "External id": 507946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143809516.983, "dur": 5.631, + "args": { + "External id": 507947,"Record function id": 0, "Ev Idx": 5674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143809518.313, "dur": 3.870, + "args": { + "External id": 507948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143809519.013, "dur": 2.760, + "args": { + "External id": 507949,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143809521.061, "dur": 0.646, + "args": { + "External id": 507950,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143809526.323, "dur": 3.514, + "args": { + "External id": 507951,"Record function id": 0, "Ev Idx": 5678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143809527.412, "dur": 2.030, + "args": { + "External id": 507952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143809527.869, "dur": 1.168, + "args": { + "External id": 507953,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143809528.282, "dur": 0.675, + "args": { + "External id": 507954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143809533.221, "dur": 4.296, + "args": { + "External id": 507955,"Record function id": 0, "Ev Idx": 5682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143809534.747, "dur": 2.370, + "args": { + "External id": 507956,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143809535.292, "dur": 1.431, + "args": { + "External id": 507957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143809535.982, "dur": 0.655, + "args": { + "External id": 507958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143809541.060, "dur": 5.556, + "args": { + "External id": 507959,"Record function id": 0, "Ev Idx": 5686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143809542.237, "dur": 3.978, + "args": { + "External id": 507960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143809542.691, "dur": 3.110, + "args": { + "External id": 507961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143809543.077, "dur": 2.645, + "args": { + "External id": 507962,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143809551.030, "dur": 76080.112, + "args": { + "External id": 507963,"Record function id": 0, "Sequence number": 5516011, "Fwd thread id": 1, "Ev Idx": 5690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143809552.572, "dur": 76067.403, + "args": { + "External id": 507964,"Sequence number": 5516011, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5691 + } + }, + { + "ph": "f", "id": 223, "pid": 2338711, "tid": 2379440, "ts": 6274143809552.572, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.0)", "pid": 2338711, "tid": 2379440, + "ts": 6274143809604.097, "dur": 50.422, + "args": { + "External id": 507965,"Record function id": 0, "Ev Idx": 5692 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.0)", "pid": 2338711, "tid": 2379440, + "ts": 6274143809665.579, "dur": 76.004, + "args": { + "External id": 507966,"Record function id": 0, "Ev Idx": 5693 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.0)", "pid": 2338711, "tid": 2379440, + "ts": 6274143809747.947, "dur": 75861.115, + "args": { + "External id": 507967,"Record function id": 0, "Ev Idx": 5694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143809846.551, "dur": 8.752, + "args": { + "External id": 507968,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143809866.548, "dur": 5.481, + "args": { + "External id": 507969,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143809886.783, "dur": 74785.716, + "args": { + "External id": 507970,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143809901.157, "dur": 74755.706, + "args": { + "External id": 507971,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143809991.371, "dur": 20.190, + "args": { + "External id": 507972,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143810045.874, "dur": 74537.013, + "args": { + "External id": 507973,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143810053.819, "dur": 74527.799, + "args": { + "External id": 507974,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143810097.664, "dur": 17.078, + "args": { + "External id": 507975,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143810117.720, "dur": 74458.045, + "args": { + "External id": 507976,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143884789.569, "dur": 12.963, + "args": { + "External id": 507977,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143884793.236, "dur": 8.896, + "args": { + "External id": 507978,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143884835.996, "dur": 444.388, + "args": { + "External id": 507979,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143884868.707, "dur": 405.451, + "args": { + "External id": 507980,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5707, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143884883.476, "dur": 384.134, + "args": { + "External id": 507981,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143885308.725, "dur": 2.799, + "args": { + "External id": 507982,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5709, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143885376.979, "dur": 7.180, + "args": { + "External id": 507983,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143885431.144, "dur": 3.594, + "args": { + "External id": 507984,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143885450.484, "dur": 1.380, + "args": { + "External id": 507985,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143885463.603, "dur": 0.956, + "args": { + "External id": 507986,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143885476.319, "dur": 0.946, + "args": { + "External id": 507987,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143885488.418, "dur": 2.890, + "args": { + "External id": 507988,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143885500.837, "dur": 0.978, + "args": { + "External id": 507989,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143885513.831, "dur": 2.188, + "args": { + "External id": 507990,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143885525.368, "dur": 0.640, + "args": { + "External id": 507991,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143885652.226, "dur": 312.492, + "args": { + "External id": 507992,"Record function id": 0, "Sequence number": 5516010, "Fwd thread id": 1, "Ev Idx": 5719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274143885655.860, "dur": 298.984, + "args": { + "External id": 507993,"Sequence number": 5516010, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5720 + } + }, + { + "ph": "f", "id": 224, "pid": 2338711, "tid": 2379440, "ts": 6274143885655.860, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_0", "pid": 2338711, "tid": 2379440, + "ts": 6274143885789.237, "dur": 54.953, + "args": { + "External id": 507994,"kernel_hash": "cqs2myue7aybil6hnmkybvk3zq47ei4vcoyzevvtnugjbrc4xs4z", "grid": "grid(131072000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "131072000"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqs2myue7aybil6hnmkybvk3zq47ei4vcoyzevvtnugjbrc4xs4z.py", "kernel_backend": "triton", "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 5721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_1", "pid": 2338711, "tid": 2379440, + "ts": 6274143885861.999, "dur": 29.785, + "args": { + "External id": 507995,"kernel_hash": "c6cmd6xpiovdtzbdwl3f2ujlfsh5jockr6d4eebntgc5ux3biyal", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6c/c6cmd6xpiovdtzbdwl3f2ujlfsh5jockr6d4eebntgc5ux3biyal.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096, 4096], [32000, 4096], []], "Ev Idx": 5722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_2", "pid": 2338711, "tid": 2379440, + "ts": 6274143885912.789, "dur": 24.453, + "args": { + "External id": 507996,"kernel_hash": "ccof7kftxamzmzcppmoshsaaubnjbbmiyvgppcoqslpagutqs5jr", "grid": "grid(131072000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "131072000"], "kernel_file": "/tmp/torchinductor_cvm/co/ccof7kftxamzmzcppmoshsaaubnjbbmiyvgppcoqslpagutqs5jr.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 5723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143885975.479, "dur": 15.102, + "args": { + "External id": 507997,"Record function id": 0, "Ev Idx": 5724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274143885979.077, "dur": 10.567, + "args": { + "External id": 507998,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 5725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143885982.860, "dur": 5.797, + "args": { + "External id": 507999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 5726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274143885984.530, "dur": 4.031, + "args": { + "External id": 508000,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 5727 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_post_backward_callback", "pid": 2338711, "tid": 2379440, + "ts": 6274143886014.033, "dur": 9761.029, + "args": { + "External id": 508001,"Record function id": 0, "Ev Idx": 5728 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate", "pid": 2338711, "tid": 2379440, + "ts": 6274143886035.743, "dur": 80.793, + "args": { + "External id": 508002,"Record function id": 0, "Ev Idx": 5729 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard", "pid": 2338711, "tid": 2379440, + "ts": 6274143886124.877, "dur": 226.758, + "args": { + "External id": 508003,"Record function id": 0, "Ev Idx": 5730 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce", "pid": 2338711, "tid": 2379440, + "ts": 6274143886358.008, "dur": 9108.876, + "args": { + "External id": 508004,"Record function id": 0, "Ev Idx": 5731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143886488.320, "dur": 8.820, + "args": { + "External id": 508005,"Record function id": 0, "Concrete Inputs": ["[1134596096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274143886508.598, "dur": 5.524, + "args": { + "External id": 508006,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1134596096], []], "Ev Idx": 5733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143886537.558, "dur": 7510.876, + "args": { + "External id": 508007,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [141824512, 1]], "Input Dims": [[], [], [], [8, 141824512]], "Ev Idx": 5734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274143886556.520, "dur": 7475.468, + "args": { + "External id": 508008,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [141824512, 1]], "Input Dims": [[], [], [], [8, 141824512]], "Ev Idx": 5735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143887421.588, "dur": 25.264, + "args": { + "External id": 508009,"Record function id": 0, "Concrete Inputs": ["[277237]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274143887803.764, "dur": 6173.829, + "args": { + "External id": 508010,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[277237], [], [], [], [], [], [], []], "Ev Idx": 5737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274143887807.810, "dur": 6168.452, + "args": { + "External id": 508011,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[277237], [], [], [], [], [], []], "Ev Idx": 5738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143887815.222, "dur": 20.501, + "args": { + "External id": 508012,"Record function id": 0, "Concrete Inputs": ["[277237]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274143887839.216, "dur": 6129.710, + "args": { + "External id": 508013,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[277237], [277237], []], "Ev Idx": 5740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143894241.350, "dur": 13.176, + "args": { + "External id": 508014,"Record function id": 0, "Concrete Inputs": ["", "[141824512]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1134596096], [], [], [], [], []], "Ev Idx": 5741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274143894245.279, "dur": 8.718, + "args": { + "External id": 508015,"Record function id": 0, "Concrete Inputs": ["[141824512]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274143894287.150, "dur": 439.642, + "args": { + "External id": 508016,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[141824512], [1134596096], [], [], [], []], "Ev Idx": 5743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143894319.630, "dur": 401.120, + "args": { + "External id": 508017,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 141824512, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1134596096], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5744, "In msg nelems": 1134596096 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274143894333.477, "dur": 380.885, + "args": { + "External id": 508018,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1134596096]], "Ev Idx": 5745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274143894755.020, "dur": 2.786, + "args": { + "External id": 508019,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5746, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143894826.277, "dur": 7.813, + "args": { + "External id": 508020,"Record function id": 0, "Concrete Inputs": ["", "[4000, 4096]", "[4096, 1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143894884.053, "dur": 0.996, + "args": { + "External id": 508021,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143894900.347, "dur": 1.409, + "args": { + "External id": 508022,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "16384512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143894914.160, "dur": 0.749, + "args": { + "External id": 508023,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "18481664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143894926.771, "dur": 0.870, + "args": { + "External id": 508024,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "19005952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143894941.961, "dur": 0.719, + "args": { + "External id": 508025,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "19530240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143894954.104, "dur": 2.241, + "args": { + "External id": 508026,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143894965.478, "dur": 0.852, + "args": { + "External id": 508027,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "21627904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143894976.240, "dur": 0.651, + "args": { + "External id": 508028,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "28967936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143894989.054, "dur": 0.821, + "args": { + "External id": 508029,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "36307968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895000.926, "dur": 0.786, + "args": { + "External id": 508030,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895014.057, "dur": 0.711, + "args": { + "External id": 508031,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "43648512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895025.846, "dur": 0.509, + "args": { + "External id": 508032,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "45745664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895037.242, "dur": 0.653, + "args": { + "External id": 508033,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "46269952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895047.174, "dur": 1.591, + "args": { + "External id": 508034,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "46794240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895109.164, "dur": 1.377, + "args": { + "External id": 508035,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895125.559, "dur": 0.916, + "args": { + "External id": 508036,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "48891904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895138.006, "dur": 0.665, + "args": { + "External id": 508037,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "56231936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895148.452, "dur": 0.709, + "args": { + "External id": 508038,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "63571968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895158.947, "dur": 0.682, + "args": { + "External id": 508039,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895170.758, "dur": 2.375, + "args": { + "External id": 508040,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "70912512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895186.683, "dur": 0.893, + "args": { + "External id": 508041,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "73009664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895197.718, "dur": 1.069, + "args": { + "External id": 508042,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "73533952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895220.397, "dur": 1.191, + "args": { + "External id": 508043,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "74058240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895232.963, "dur": 0.901, + "args": { + "External id": 508044,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895245.634, "dur": 0.816, + "args": { + "External id": 508045,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "76155904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895258.590, "dur": 0.985, + "args": { + "External id": 508046,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "83495936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895268.940, "dur": 0.716, + "args": { + "External id": 508047,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "90835968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895278.599, "dur": 2.358, + "args": { + "External id": 508048,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895293.176, "dur": 0.738, + "args": { + "External id": 508049,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "98176512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895304.646, "dur": 0.672, + "args": { + "External id": 508050,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "100273664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895317.534, "dur": 0.791, + "args": { + "External id": 508051,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "100797952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895327.188, "dur": 0.870, + "args": { + "External id": 508052,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "101322240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895339.557, "dur": 0.772, + "args": { + "External id": 508053,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895349.467, "dur": 1.724, + "args": { + "External id": 508054,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "103419904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895359.954, "dur": 0.714, + "args": { + "External id": 508055,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "110759936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895372.418, "dur": 1.047, + "args": { + "External id": 508056,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "118099968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895385.069, "dur": 0.950, + "args": { + "External id": 508057,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274143895395.083, "dur": 0.901, + "args": { + "External id": 508058,"Record function id": 0, "Concrete Inputs": ["", "[4000, 4096]", "[4096, 1]", "125440512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: DivBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145051271.348, "dur": 102.881, + "args": { + "External id": 508059,"Record function id": 0, "Sequence number": 5516465, "Fwd thread id": 1, "Ev Idx": 5786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "DivBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145051280.295, "dur": 84.550, + "args": { + "External id": 508060,"Sequence number": 5516465, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5787 + } + }, + { + "ph": "f", "id": 225, "pid": 2338711, "tid": 2379440, "ts": 6274145051280.295, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338711, "tid": 2379440, + "ts": 6274145051287.880, "dur": 75.568, + "args": { + "External id": 508061,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145051382.774, "dur": 250.028, + "args": { + "External id": 508062,"Record function id": 0, "Ev Idx": 5789 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145051455.732, "dur": 85.677, + "args": { + "External id": 508063,"Record function id": 0, "Ev Idx": 5790 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.25", "pid": 2338711, "tid": 2379440, + "ts": 6274145051485.148, "dur": 45.222, + "args": { + "External id": 508064,"Record function id": 0, "Ev Idx": 5791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145051546.501, "dur": 1.816, + "args": { + "External id": 508065,"Sequence number": 5516464, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5792 + } + }, + { + "ph": "f", "id": 226, "pid": 2338711, "tid": 2379440, "ts": 6274145051546.501, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145051552.431, "dur": 73.290, + "args": { + "External id": 508066,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145051557.319, "dur": 67.305, + "args": { + "External id": 508067,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 5794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145051568.294, "dur": 2.875, + "args": { + "External id": 508068,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 5795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145051647.458, "dur": 41282.097, + "args": { + "External id": 508069,"Record function id": 0, "Sequence number": 5516462, "Fwd thread id": 1, "Ev Idx": 5796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145051650.549, "dur": 41261.932, + "args": { + "External id": 508070,"Sequence number": 5516462, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5797 + } + }, + { + "ph": "f", "id": 227, "pid": 2338711, "tid": 2379440, "ts": 6274145051650.549, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145051695.076, "dur": 4.457, + "args": { + "External id": 508071,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145051704.538, "dur": 40862.521, + "args": { + "External id": 508072,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145051706.691, "dur": 40859.748, + "args": { + "External id": 508073,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145051710.390, "dur": 7.733, + "args": { + "External id": 508074,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145051719.851, "dur": 40843.127, + "args": { + "External id": 508075,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338711, "tid": 2379440, + "ts": 6274145092575.078, "dur": 0.901, + "args": { + "External id": 508076,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338711, "tid": 2379440, + "ts": 6274145092578.664, "dur": 3.618, + "args": { + "External id": 508077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338711, "tid": 2379440, + "ts": 6274145092580.639, "dur": 1.374, + "args": { + "External id": 508078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338711, "tid": 2379440, + "ts": 6274145092605.534, "dur": 51.438, + "args": { + "External id": 508079,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 5806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338711, "tid": 2379440, + "ts": 6274145092668.954, "dur": 56.202, + "args": { + "External id": 508080,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338711, "tid": 2379440, + "ts": 6274145092671.039, "dur": 53.928, + "args": { + "External id": 508081,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338711, "tid": 2379440, + "ts": 6274145092673.066, "dur": 51.531, + "args": { + "External id": 508082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145092951.110, "dur": 20.692, + "args": { + "External id": 508083,"Record function id": 0, "Sequence number": 5516461, "Fwd thread id": 1, "Ev Idx": 5810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145092953.728, "dur": 15.168, + "args": { + "External id": 508084,"Sequence number": 5516461, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 5811 + } + }, + { + "ph": "f", "id": 228, "pid": 2338711, "tid": 2379440, "ts": 6274145092953.728, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145092959.656, "dur": 8.932, + "args": { + "External id": 508085,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145092961.999, "dur": 6.391, + "args": { + "External id": 508086,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145092976.153, "dur": 152.923, + "args": { + "External id": 508087,"Record function id": 0, "Sequence number": 5516460, "Fwd thread id": 1, "Ev Idx": 5814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145092977.068, "dur": 142.291, + "args": { + "External id": 508088,"Sequence number": 5516460, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5815 + } + }, + { + "ph": "f", "id": 229, "pid": 2338711, "tid": 2379440, "ts": 6274145092977.068, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145092980.982, "dur": 137.686, + "args": { + "External id": 508089,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 5816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274145092987.617, "dur": 40.633, + "args": { + "External id": 508090,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145092992.840, "dur": 6.730, + "args": { + "External id": 508091,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274145093001.260, "dur": 26.702, + "args": { + "External id": 508092,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274145093005.451, "dur": 21.932, + "args": { + "External id": 508093,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 5820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145093030.802, "dur": 6.322, + "args": { + "External id": 508094,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 5821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145093035.035, "dur": 1.663, + "args": { + "External id": 508095,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145093039.042, "dur": 78.262, + "args": { + "External id": 508096,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145093136.423, "dur": 66.691, + "args": { + "External id": 508097,"Record function id": 0, "Sequence number": 5516459, "Fwd thread id": 1, "Ev Idx": 5824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145093137.748, "dur": 62.400, + "args": { + "External id": 508098,"Sequence number": 5516459, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5825 + } + }, + { + "ph": "f", "id": 230, "pid": 2338711, "tid": 2379440, "ts": 6274145093137.748, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145093141.668, "dur": 58.109, + "args": { + "External id": 508099,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "3"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274145093145.317, "dur": 26.308, + "args": { + "External id": 508100,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145093146.881, "dur": 3.618, + "args": { + "External id": 508101,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274145093151.555, "dur": 19.821, + "args": { + "External id": 508102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274145093155.862, "dur": 14.948, + "args": { + "External id": 508103,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2379440, + "ts": 6274145093173.217, "dur": 6.921, + "args": { + "External id": 508104,"Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 5831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145093178.304, "dur": 1.177, + "args": { + "External id": 508105,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145093181.318, "dur": 17.946, + "args": { + "External id": 508106,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145093207.152, "dur": 214.102, + "args": { + "External id": 508107,"Record function id": 0, "Sequence number": 5516458, "Fwd thread id": 1, "Ev Idx": 5834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145093208.524, "dur": 208.721, + "args": { + "External id": 508108,"Sequence number": 5516458, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5835 + } + }, + { + "ph": "f", "id": 231, "pid": 2338711, "tid": 2379440, "ts": 6274145093208.524, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145093211.192, "dur": 205.468, + "args": { + "External id": 508109,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274145093213.004, "dur": 18.099, + "args": { + "External id": 508110,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145093214.277, "dur": 2.594, + "args": { + "External id": 508111,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274145093217.915, "dur": 12.942, + "args": { + "External id": 508112,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274145093220.108, "dur": 10.341, + "args": { + "External id": 508113,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145093234.800, "dur": 3.856, + "args": { + "External id": 508114,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145093237.383, "dur": 0.993, + "args": { + "External id": 508115,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145093239.724, "dur": 175.740, + "args": { + "External id": 508116,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145093425.681, "dur": 99.977, + "args": { + "External id": 508117,"Record function id": 0, "Sequence number": 5516457, "Fwd thread id": 1, "Ev Idx": 5844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145093426.898, "dur": 94.386, + "args": { + "External id": 508118,"Sequence number": 5516457, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5845 + } + }, + { + "ph": "f", "id": 232, "pid": 2338711, "tid": 2379440, "ts": 6274145093426.898, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145093428.971, "dur": 91.935, + "args": { + "External id": 508119,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274145093430.776, "dur": 18.069, + "args": { + "External id": 508120,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145093431.896, "dur": 2.228, + "args": { + "External id": 508121,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274145093434.968, "dur": 13.619, + "args": { + "External id": 508122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274145093436.348, "dur": 11.890, + "args": { + "External id": 508123,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145093449.772, "dur": 5.334, + "args": { + "External id": 508124,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145093451.966, "dur": 2.900, + "args": { + "External id": 508125,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145093466.333, "dur": 53.424, + "args": { + "External id": 508126,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145093530.658, "dur": 38.842, + "args": { + "External id": 508127,"Record function id": 0, "Sequence number": 5516456, "Fwd thread id": 1, "Ev Idx": 5854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145093532.281, "dur": 0.948, + "args": { + "External id": 508128,"Sequence number": 5516456, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5855 + } + }, + { + "ph": "f", "id": 233, "pid": 2338711, "tid": 2379440, "ts": 6274145093532.281, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145093536.284, "dur": 29.903, + "args": { + "External id": 508129,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145093538.910, "dur": 26.843, + "args": { + "External id": 508130,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 5857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145093545.566, "dur": 0.762, + "args": { + "External id": 508131,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 5858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145093575.517, "dur": 2543.685, + "args": { + "External id": 508132,"Record function id": 0, "Sequence number": 5516454, "Fwd thread id": 1, "Ev Idx": 5859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145093577.681, "dur": 2467.639, + "args": { + "External id": 508133,"Sequence number": 5516454, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5860 + } + }, + { + "ph": "f", "id": 234, "pid": 2338711, "tid": 2379440, "ts": 6274145093577.681, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145093643.647, "dur": 4.417, + "args": { + "External id": 508134,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145093652.291, "dur": 2142.487, + "args": { + "External id": 508135,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145093654.881, "dur": 2139.587, + "args": { + "External id": 508136,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145093659.704, "dur": 6.281, + "args": { + "External id": 508137,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145093667.788, "dur": 2124.993, + "args": { + "External id": 508138,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338711, "tid": 2379440, + "ts": 6274145095799.586, "dur": 0.419, + "args": { + "External id": 508139,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338711, "tid": 2379440, + "ts": 6274145095804.809, "dur": 3.689, + "args": { + "External id": 508140,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338711, "tid": 2379440, + "ts": 6274145095806.696, "dur": 1.459, + "args": { + "External id": 508141,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338711, "tid": 2379440, + "ts": 6274145095814.206, "dur": 27.897, + "args": { + "External id": 508142,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 5869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338711, "tid": 2379440, + "ts": 6274145095849.372, "dur": 44.037, + "args": { + "External id": 508143,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338711, "tid": 2379440, + "ts": 6274145095851.066, "dur": 42.094, + "args": { + "External id": 508144,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338711, "tid": 2379440, + "ts": 6274145095852.791, "dur": 40.109, + "args": { + "External id": 508145,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145096088.211, "dur": 25.497, + "args": { + "External id": 508146,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 5873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145096134.870, "dur": 20.044, + "args": { + "External id": 508147,"Record function id": 0, "Sequence number": 5516453, "Fwd thread id": 1, "Ev Idx": 5874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145096136.685, "dur": 15.647, + "args": { + "External id": 508148,"Sequence number": 5516453, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 5875 + } + }, + { + "ph": "f", "id": 235, "pid": 2338711, "tid": 2379440, "ts": 6274145096136.685, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145096141.702, "dur": 10.355, + "args": { + "External id": 508149,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145096146.334, "dur": 5.478, + "args": { + "External id": 508150,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145096158.445, "dur": 88.376, + "args": { + "External id": 508151,"Record function id": 0, "Sequence number": 5516452, "Fwd thread id": 1, "Ev Idx": 5878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145096159.325, "dur": 82.175, + "args": { + "External id": 508152,"Sequence number": 5516452, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5879 + } + }, + { + "ph": "f", "id": 236, "pid": 2338711, "tid": 2379440, "ts": 6274145096159.325, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145096162.247, "dur": 78.878, + "args": { + "External id": 508153,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 5880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274145096165.855, "dur": 31.180, + "args": { + "External id": 508154,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145096168.807, "dur": 4.666, + "args": { + "External id": 508155,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274145096174.967, "dur": 21.807, + "args": { + "External id": 508156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274145096177.742, "dur": 18.493, + "args": { + "External id": 508157,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 5884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145096198.989, "dur": 5.936, + "args": { + "External id": 508158,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 5885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145096203.221, "dur": 1.346, + "args": { + "External id": 508159,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145096206.349, "dur": 33.909, + "args": { + "External id": 508160,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145096251.234, "dur": 60.761, + "args": { + "External id": 508161,"Record function id": 0, "Sequence number": 5516451, "Fwd thread id": 1, "Ev Idx": 5888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145096254.828, "dur": 54.413, + "args": { + "External id": 508162,"Sequence number": 5516451, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5889 + } + }, + { + "ph": "f", "id": 237, "pid": 2338711, "tid": 2379440, "ts": 6274145096254.828, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145096257.422, "dur": 51.565, + "args": { + "External id": 508163,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "2"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274145096259.689, "dur": 18.912, + "args": { + "External id": 508164,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145096261.415, "dur": 2.707, + "args": { + "External id": 508165,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274145096265.101, "dur": 13.243, + "args": { + "External id": 508166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274145096266.038, "dur": 11.883, + "args": { + "External id": 508167,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2379440, + "ts": 6274145096285.971, "dur": 6.064, + "args": { + "External id": 508168,"Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 5895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145096290.549, "dur": 0.892, + "args": { + "External id": 508169,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145096292.929, "dur": 15.381, + "args": { + "External id": 508170,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145096316.141, "dur": 144.691, + "args": { + "External id": 508171,"Record function id": 0, "Sequence number": 5516450, "Fwd thread id": 1, "Ev Idx": 5898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145096319.622, "dur": 137.335, + "args": { + "External id": 508172,"Sequence number": 5516450, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5899 + } + }, + { + "ph": "f", "id": 238, "pid": 2338711, "tid": 2379440, "ts": 6274145096319.622, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145096321.867, "dur": 134.649, + "args": { + "External id": 508173,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274145096323.302, "dur": 17.227, + "args": { + "External id": 508174,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145096324.573, "dur": 2.249, + "args": { + "External id": 508175,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274145096327.769, "dur": 12.507, + "args": { + "External id": 508176,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274145096328.461, "dur": 11.490, + "args": { + "External id": 508177,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145096341.572, "dur": 5.644, + "args": { + "External id": 508178,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145096343.209, "dur": 3.717, + "args": { + "External id": 508179,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145096348.116, "dur": 107.571, + "args": { + "External id": 508180,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145096466.212, "dur": 106.051, + "args": { + "External id": 508181,"Record function id": 0, "Sequence number": 5516449, "Fwd thread id": 1, "Ev Idx": 5908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145096467.221, "dur": 86.060, + "args": { + "External id": 508182,"Sequence number": 5516449, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5909 + } + }, + { + "ph": "f", "id": 239, "pid": 2338711, "tid": 2379440, "ts": 6274145096467.221, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145096469.782, "dur": 83.097, + "args": { + "External id": 508183,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274145096474.045, "dur": 17.028, + "args": { + "External id": 508184,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145096475.401, "dur": 2.059, + "args": { + "External id": 508185,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274145096478.383, "dur": 12.427, + "args": { + "External id": 508186,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274145096479.529, "dur": 10.858, + "args": { + "External id": 508187,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145096492.224, "dur": 2.906, + "args": { + "External id": 508188,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145096494.237, "dur": 0.661, + "args": { + "External id": 508189,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145096496.176, "dur": 55.976, + "args": { + "External id": 508190,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145096557.484, "dur": 13.186, + "args": { + "External id": 508191,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145096577.287, "dur": 68.001, + "args": { + "External id": 508192,"Record function id": 0, "Sequence number": 5516448, "Fwd thread id": 1, "Ev Idx": 5919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145096578.711, "dur": 1.146, + "args": { + "External id": 508193,"Sequence number": 5516448, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5920 + } + }, + { + "ph": "f", "id": 240, "pid": 2338711, "tid": 2379440, "ts": 6274145096578.711, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145096582.517, "dur": 58.304, + "args": { + "External id": 508194,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145096603.225, "dur": 36.911, + "args": { + "External id": 508195,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 5922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145096613.368, "dur": 1.379, + "args": { + "External id": 508196,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 5923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145096654.393, "dur": 3526.367, + "args": { + "External id": 508197,"Record function id": 0, "Sequence number": 5516446, "Fwd thread id": 1, "Ev Idx": 5924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145096657.271, "dur": 3483.220, + "args": { + "External id": 508198,"Sequence number": 5516446, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5925 + } + }, + { + "ph": "f", "id": 241, "pid": 2338711, "tid": 2379440, "ts": 6274145096657.271, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145096700.862, "dur": 3.113, + "args": { + "External id": 508199,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145096706.550, "dur": 3161.378, + "args": { + "External id": 508200,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145096708.872, "dur": 3158.752, + "args": { + "External id": 508201,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145096712.472, "dur": 4.740, + "args": { + "External id": 508202,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145096718.119, "dur": 3148.003, + "args": { + "External id": 508203,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338711, "tid": 2379440, + "ts": 6274145099872.571, "dur": 0.629, + "args": { + "External id": 508204,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338711, "tid": 2379440, + "ts": 6274145099875.155, "dur": 3.292, + "args": { + "External id": 508205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338711, "tid": 2379440, + "ts": 6274145099877.138, "dur": 1.115, + "args": { + "External id": 508206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338711, "tid": 2379440, + "ts": 6274145099884.193, "dur": 25.203, + "args": { + "External id": 508207,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 5934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338711, "tid": 2379440, + "ts": 6274145099916.156, "dur": 49.850, + "args": { + "External id": 508208,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338711, "tid": 2379440, + "ts": 6274145099920.348, "dur": 45.414, + "args": { + "External id": 508209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338711, "tid": 2379440, + "ts": 6274145099922.131, "dur": 43.208, + "args": { + "External id": 508210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145100157.039, "dur": 19.052, + "args": { + "External id": 508211,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 5938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145100195.582, "dur": 16.865, + "args": { + "External id": 508212,"Record function id": 0, "Sequence number": 5516445, "Fwd thread id": 1, "Ev Idx": 5939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145100197.432, "dur": 12.846, + "args": { + "External id": 508213,"Sequence number": 5516445, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 5940 + } + }, + { + "ph": "f", "id": 242, "pid": 2338711, "tid": 2379440, "ts": 6274145100197.432, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145100202.785, "dur": 7.224, + "args": { + "External id": 508214,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145100205.006, "dur": 4.824, + "args": { + "External id": 508215,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145100216.393, "dur": 79.338, + "args": { + "External id": 508216,"Record function id": 0, "Sequence number": 5516444, "Fwd thread id": 1, "Ev Idx": 5943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145100217.381, "dur": 73.634, + "args": { + "External id": 508217,"Sequence number": 5516444, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5944 + } + }, + { + "ph": "f", "id": 243, "pid": 2338711, "tid": 2379440, "ts": 6274145100217.381, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145100220.258, "dur": 70.430, + "args": { + "External id": 508218,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 5945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274145100226.678, "dur": 24.819, + "args": { + "External id": 508219,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145100229.796, "dur": 4.560, + "args": { + "External id": 508220,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274145100235.667, "dur": 15.547, + "args": { + "External id": 508221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274145100237.876, "dur": 12.852, + "args": { + "External id": 508222,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 5949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145100253.161, "dur": 7.950, + "args": { + "External id": 508223,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 5950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145100256.797, "dur": 3.894, + "args": { + "External id": 508224,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145100262.598, "dur": 27.127, + "args": { + "External id": 508225,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145100300.342, "dur": 53.813, + "args": { + "External id": 508226,"Record function id": 0, "Sequence number": 5516443, "Fwd thread id": 1, "Ev Idx": 5953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145100301.235, "dur": 50.193, + "args": { + "External id": 508227,"Sequence number": 5516443, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5954 + } + }, + { + "ph": "f", "id": 244, "pid": 2338711, "tid": 2379440, "ts": 6274145100301.235, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145100303.831, "dur": 47.281, + "args": { + "External id": 508228,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274145100306.069, "dur": 18.590, + "args": { + "External id": 508229,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145100309.650, "dur": 3.049, + "args": { + "External id": 508230,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274145100313.409, "dur": 10.994, + "args": { + "External id": 508231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274145100314.444, "dur": 9.618, + "args": { + "External id": 508232,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2379440, + "ts": 6274145100326.038, "dur": 6.143, + "args": { + "External id": 508233,"Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 5960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145100330.727, "dur": 0.867, + "args": { + "External id": 508234,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145100332.940, "dur": 17.461, + "args": { + "External id": 508235,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145100358.082, "dur": 133.099, + "args": { + "External id": 508236,"Record function id": 0, "Sequence number": 5516442, "Fwd thread id": 1, "Ev Idx": 5963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145100359.211, "dur": 128.161, + "args": { + "External id": 508237,"Sequence number": 5516442, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5964 + } + }, + { + "ph": "f", "id": 245, "pid": 2338711, "tid": 2379440, "ts": 6274145100359.211, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145100361.118, "dur": 125.751, + "args": { + "External id": 508238,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274145100365.681, "dur": 17.936, + "args": { + "External id": 508239,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145100366.742, "dur": 2.319, + "args": { + "External id": 508240,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274145100369.848, "dur": 13.501, + "args": { + "External id": 508241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274145100370.978, "dur": 12.024, + "args": { + "External id": 508242,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145100384.776, "dur": 3.238, + "args": { + "External id": 508243,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145100387.251, "dur": 0.502, + "args": { + "External id": 508244,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145100388.845, "dur": 96.877, + "args": { + "External id": 508245,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145100496.056, "dur": 128.475, + "args": { + "External id": 508246,"Record function id": 0, "Sequence number": 5516441, "Fwd thread id": 1, "Ev Idx": 5973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145100497.343, "dur": 84.577, + "args": { + "External id": 508247,"Sequence number": 5516441, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5974 + } + }, + { + "ph": "f", "id": 246, "pid": 2338711, "tid": 2379440, "ts": 6274145100497.343, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145100498.918, "dur": 82.567, + "args": { + "External id": 508248,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274145100500.143, "dur": 22.083, + "args": { + "External id": 508249,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145100503.859, "dur": 2.130, + "args": { + "External id": 508250,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274145100506.867, "dur": 15.116, + "args": { + "External id": 508251,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274145100507.899, "dur": 13.719, + "args": { + "External id": 508252,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145100523.198, "dur": 2.998, + "args": { + "External id": 508253,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145100525.382, "dur": 0.684, + "args": { + "External id": 508254,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145100527.305, "dur": 53.425, + "args": { + "External id": 508255,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145100600.914, "dur": 20.555, + "args": { + "External id": 508256,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145100634.014, "dur": 54.573, + "args": { + "External id": 508257,"Record function id": 0, "Sequence number": 5516440, "Fwd thread id": 1, "Ev Idx": 5984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145100636.237, "dur": 1.492, + "args": { + "External id": 508258,"Sequence number": 5516440, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5985 + } + }, + { + "ph": "f", "id": 247, "pid": 2338711, "tid": 2379440, "ts": 6274145100636.237, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145100640.892, "dur": 41.000, + "args": { + "External id": 508259,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145100644.346, "dur": 37.028, + "args": { + "External id": 508260,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 5987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145100652.390, "dur": 3.492, + "args": { + "External id": 508261,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 5988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145100693.437, "dur": 3565.387, + "args": { + "External id": 508262,"Record function id": 0, "Sequence number": 5516439, "Fwd thread id": 1, "Ev Idx": 5989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145100706.780, "dur": 3509.010, + "args": { + "External id": 508263,"Sequence number": 5516439, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5990 + } + }, + { + "ph": "f", "id": 248, "pid": 2338711, "tid": 2379440, "ts": 6274145100706.780, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145100744.078, "dur": 3.918, + "args": { + "External id": 508264,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145100751.078, "dur": 3213.584, + "args": { + "External id": 508265,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145100752.923, "dur": 3211.343, + "args": { + "External id": 508266,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145100756.270, "dur": 4.021, + "args": { + "External id": 508267,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145100761.677, "dur": 3200.897, + "args": { + "External id": 508268,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338711, "tid": 2379440, + "ts": 6274145103969.678, "dur": 0.599, + "args": { + "External id": 508269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338711, "tid": 2379440, + "ts": 6274145103971.917, "dur": 2.976, + "args": { + "External id": 508270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338711, "tid": 2379440, + "ts": 6274145103973.835, "dur": 0.881, + "args": { + "External id": 508271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338711, "tid": 2379440, + "ts": 6274145103980.382, "dur": 25.058, + "args": { + "External id": 508272,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 5999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338711, "tid": 2379440, + "ts": 6274145104010.928, "dur": 67.224, + "args": { + "External id": 508273,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338711, "tid": 2379440, + "ts": 6274145104012.641, "dur": 65.283, + "args": { + "External id": 508274,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338711, "tid": 2379440, + "ts": 6274145104014.416, "dur": 62.702, + "args": { + "External id": 508275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145104234.299, "dur": 19.389, + "args": { + "External id": 508276,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 6003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145104275.934, "dur": 14.414, + "args": { + "External id": 508277,"Record function id": 0, "Ev Idx": 6004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145104279.127, "dur": 9.458, + "args": { + "External id": 508278,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 6005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145104282.509, "dur": 5.197, + "args": { + "External id": 508279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 6006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145104283.752, "dur": 3.827, + "args": { + "External id": 508280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 6007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145104294.613, "dur": 14.425, + "args": { + "External id": 508281,"Record function id": 0, "Sequence number": 5516438, "Fwd thread id": 1, "Ev Idx": 6008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145104295.838, "dur": 10.921, + "args": { + "External id": 508282,"Sequence number": 5516438, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6009 + } + }, + { + "ph": "f", "id": 249, "pid": 2338711, "tid": 2379440, "ts": 6274145104295.838, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145104300.593, "dur": 5.878, + "args": { + "External id": 508283,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145104302.419, "dur": 3.845, + "args": { + "External id": 508284,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145104312.606, "dur": 77.790, + "args": { + "External id": 508285,"Record function id": 0, "Sequence number": 5516437, "Fwd thread id": 1, "Ev Idx": 6012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145104313.846, "dur": 71.745, + "args": { + "External id": 508286,"Sequence number": 5516437, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6013 + } + }, + { + "ph": "f", "id": 250, "pid": 2338711, "tid": 2379440, "ts": 6274145104313.846, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145104316.726, "dur": 68.403, + "args": { + "External id": 508287,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 6014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274145104323.233, "dur": 24.543, + "args": { + "External id": 508288,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145104325.813, "dur": 4.059, + "args": { + "External id": 508289,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274145104331.192, "dur": 16.292, + "args": { + "External id": 508290,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274145104333.023, "dur": 13.977, + "args": { + "External id": 508291,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145104349.847, "dur": 5.147, + "args": { + "External id": 508292,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 6019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145104353.171, "dur": 1.492, + "args": { + "External id": 508293,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 6020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145104356.537, "dur": 27.752, + "args": { + "External id": 508294,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145104394.934, "dur": 61.253, + "args": { + "External id": 508295,"Record function id": 0, "Sequence number": 5516436, "Fwd thread id": 1, "Ev Idx": 6022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145104396.027, "dur": 56.560, + "args": { + "External id": 508296,"Sequence number": 5516436, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6023 + } + }, + { + "ph": "f", "id": 251, "pid": 2338711, "tid": 2379440, "ts": 6274145104396.027, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145104398.353, "dur": 53.981, + "args": { + "External id": 508297,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 6024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274145104400.501, "dur": 26.772, + "args": { + "External id": 508298,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145104403.862, "dur": 4.909, + "args": { + "External id": 508299,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274145104409.646, "dur": 17.381, + "args": { + "External id": 508300,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274145104410.580, "dur": 16.022, + "args": { + "External id": 508301,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 6028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2379440, + "ts": 6274145104428.606, "dur": 6.076, + "args": { + "External id": 508302,"Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145104433.182, "dur": 0.871, + "args": { + "External id": 508303,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145104435.536, "dur": 16.257, + "args": { + "External id": 508304,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145104460.134, "dur": 153.901, + "args": { + "External id": 508305,"Record function id": 0, "Sequence number": 5516435, "Fwd thread id": 1, "Ev Idx": 6032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145104461.062, "dur": 147.358, + "args": { + "External id": 508306,"Sequence number": 5516435, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6033 + } + }, + { + "ph": "f", "id": 252, "pid": 2338711, "tid": 2379440, "ts": 6274145104461.062, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145104462.840, "dur": 144.930, + "args": { + "External id": 508307,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 6034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274145104464.474, "dur": 23.415, + "args": { + "External id": 508308,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145104467.566, "dur": 2.361, + "args": { + "External id": 508309,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274145104473.465, "dur": 14.149, + "args": { + "External id": 508310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274145104476.145, "dur": 11.132, + "args": { + "External id": 508311,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 6038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145104489.097, "dur": 2.535, + "args": { + "External id": 508312,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 6039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145104490.964, "dur": 0.492, + "args": { + "External id": 508313,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145104492.804, "dur": 112.995, + "args": { + "External id": 508314,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 6041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145104623.038, "dur": 127.783, + "args": { + "External id": 508315,"Record function id": 0, "Sequence number": 5516434, "Fwd thread id": 1, "Ev Idx": 6042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145104624.707, "dur": 105.893, + "args": { + "External id": 508316,"Sequence number": 5516434, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6043 + } + }, + { + "ph": "f", "id": 253, "pid": 2338711, "tid": 2379440, "ts": 6274145104624.707, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145104627.449, "dur": 102.825, + "args": { + "External id": 508317,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 6044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2379440, + "ts": 6274145104630.002, "dur": 28.969, + "args": { + "External id": 508318,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145104634.046, "dur": 3.547, + "args": { + "External id": 508319,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2379440, + "ts": 6274145104638.906, "dur": 19.632, + "args": { + "External id": 508320,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2379440, + "ts": 6274145104640.990, "dur": 16.928, + "args": { + "External id": 508321,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 6048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145104660.505, "dur": 4.489, + "args": { + "External id": 508322,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 6049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145104663.618, "dur": 1.024, + "args": { + "External id": 508323,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145104666.307, "dur": 62.967, + "args": { + "External id": 508324,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 6051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145104735.713, "dur": 13.023, + "args": { + "External id": 508325,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 6052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145104757.479, "dur": 390.145, + "args": { + "External id": 508326,"Record function id": 0, "Sequence number": 5516433, "Fwd thread id": 1, "Ev Idx": 6053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145104759.396, "dur": 377.121, + "args": { + "External id": 508327,"Sequence number": 5516433, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6054 + } + }, + { + "ph": "f", "id": 254, "pid": 2338711, "tid": 2379440, "ts": 6274145104759.396, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145104928.863, "dur": 45.788, + "args": { + "External id": 508328,"kernel_hash": "cukbbbqr5h34iuuctj6lctslc3gc7fmn2y2xm57dvknbmyb4o2vg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "131072", "4096", "1", "993", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukbbbqr5h34iuuctj6lctslc3gc7fmn2y2xm57dvknbmyb4o2vg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[131072, 4096], [4096], [131072, 4096], [131072, 4096], [132, 4096], [131072], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145105007.294, "dur": 24.791, + "args": { + "External id": 508329,"kernel_hash": "cjqdkew4nkqxnqrjfb6bdwoluwj7gci6uynymszp5r5s3yejimwu", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/jq/cjqdkew4nkqxnqrjfb6bdwoluwj7gci6uynymszp5r5s3yejimwu.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 6056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145105051.765, "dur": 61.295, + "args": { + "External id": 508330,"kernel_hash": "ccvsds3iqc22g7vjoxnqkxggpevktgq2apwlte6cs2nh2wkycybr", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/cv/ccvsds3iqc22g7vjoxnqkxggpevktgq2apwlte6cs2nh2wkycybr.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 6057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145105160.360, "dur": 13.480, + "args": { + "External id": 508331,"Record function id": 0, "Ev Idx": 6058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145105163.136, "dur": 9.747, + "args": { + "External id": 508332,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145105166.861, "dur": 4.990, + "args": { + "External id": 508333,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145105168.280, "dur": 3.433, + "args": { + "External id": 508334,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: StackBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145105178.078, "dur": 40.076, + "args": { + "External id": 508335,"Record function id": 0, "Sequence number": 5516432, "Fwd thread id": 1, "Ev Idx": 6062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "StackBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145105179.235, "dur": 31.557, + "args": { + "External id": 508336,"Sequence number": 5516432, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6063 + } + }, + { + "ph": "f", "id": 255, "pid": 2338711, "tid": 2379440, "ts": 6274145105179.235, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2379440, + "ts": 6274145105181.829, "dur": 11.081, + "args": { + "External id": 508337,"Record function id": 0, "Concrete Inputs": ["", "-2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145105188.920, "dur": 1.775, + "args": { + "External id": 508338,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2379440, + "ts": 6274145105193.737, "dur": 4.082, + "args": { + "External id": 508339,"Record function id": 0, "Concrete Inputs": ["", "-2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145105196.276, "dur": 0.504, + "args": { + "External id": 508340,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2379440, + "ts": 6274145105198.835, "dur": 4.126, + "args": { + "External id": 508341,"Record function id": 0, "Concrete Inputs": ["", "-2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145105201.201, "dur": 0.750, + "args": { + "External id": 508342,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2379440, + "ts": 6274145105203.660, "dur": 6.394, + "args": { + "External id": 508343,"Record function id": 0, "Concrete Inputs": ["", "-2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145105205.967, "dur": 3.157, + "args": { + "External id": 508344,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145105222.319, "dur": 5.982, + "args": { + "External id": 508345,"Record function id": 0, "Sequence number": 5516431, "Fwd thread id": 1, "Ev Idx": 6072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145105223.323, "dur": 1.470, + "args": { + "External id": 508346,"Sequence number": 5516431, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6073 + } + }, + { + "ph": "f", "id": 256, "pid": 2338711, "tid": 2379440, "ts": 6274145105223.323, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145105232.624, "dur": 563.275, + "args": { + "External id": 508347,"Record function id": 0, "Sequence number": 5516430, "Fwd thread id": 1, "Ev Idx": 6074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145105234.367, "dur": 548.001, + "args": { + "External id": 508348,"Sequence number": 5516430, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6075 + } + }, + { + "ph": "f", "id": 257, "pid": 2338711, "tid": 2379440, "ts": 6274145105234.367, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145105273.113, "dur": 11.298, + "args": { + "External id": 508349,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338711, "tid": 2379440, + "ts": 6274145105279.773, "dur": 4.319, + "args": { + "External id": 508350,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 6077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145105288.416, "dur": 9.298, + "args": { + "External id": 508351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145105293.688, "dur": 2.841, + "args": { + "External id": 508352,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145105295.662, "dur": 0.630, + "args": { + "External id": 508353,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2379440, + "ts": 6274145105302.092, "dur": 131.109, + "args": { + "External id": 508354,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 6081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145105303.414, "dur": 3.575, + "args": { + "External id": 508355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 6082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145105304.272, "dur": 1.705, + "args": { + "External id": 508356,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145105305.487, "dur": 0.375, + "args": { + "External id": 508357,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2379440, + "ts": 6274145105313.890, "dur": 118.536, + "args": { + "External id": 508358,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145105315.672, "dur": 115.847, + "args": { + "External id": 508359,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274145105438.282, "dur": 5.166, + "args": { + "External id": 508360,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 6087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145105440.217, "dur": 3.080, + "args": { + "External id": 508361,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145105476.439, "dur": 5.678, + "args": { + "External id": 508362,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145105483.680, "dur": 3.455, + "args": { + "External id": 508363,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145105488.422, "dur": 4.711, + "args": { + "External id": 508364,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145105530.341, "dur": 2.712, + "args": { + "External id": 508365,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145105531.371, "dur": 1.465, + "args": { + "External id": 508366,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338711, "tid": 2379440, + "ts": 6274145105559.819, "dur": 196.186, + "args": { + "External id": 508367,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 6094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2379440, + "ts": 6274145105567.630, "dur": 6.153, + "args": { + "External id": 508368,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145105571.850, "dur": 0.786, + "args": { + "External id": 508369,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274145105576.403, "dur": 7.386, + "args": { + "External id": 508370,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 6097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145105581.828, "dur": 0.672, + "args": { + "External id": 508371,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 6098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2379440, + "ts": 6274145105585.465, "dur": 26.564, + "args": { + "External id": 508372,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145105609.801, "dur": 0.855, + "args": { + "External id": 508373,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274145105613.995, "dur": 5.880, + "args": { + "External id": 508374,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145105617.672, "dur": 1.044, + "args": { + "External id": 508375,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 6102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274145105628.198, "dur": 4.595, + "args": { + "External id": 508376,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 6103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145105631.297, "dur": 0.906, + "args": { + "External id": 508377,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 6104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145105634.829, "dur": 9.471, + "args": { + "External id": 508378,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 6105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338711, "tid": 2379440, + "ts": 6274145105641.130, "dur": 2.797, + "args": { + "External id": 508379,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 6106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274145105645.819, "dur": 6.212, + "args": { + "External id": 508380,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 6107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145105648.450, "dur": 3.013, + "args": { + "External id": 508381,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 6108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145105653.681, "dur": 3.901, + "args": { + "External id": 508382,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145105655.084, "dur": 2.313, + "args": { + "External id": 508383,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274145105660.128, "dur": 78.936, + "args": { + "External id": 508384,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 6111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145105743.892, "dur": 1.324, + "args": { + "External id": 508385,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 6112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274145105746.601, "dur": 4.095, + "args": { + "External id": 508386,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 6113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145105749.036, "dur": 0.732, + "args": { + "External id": 508387,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 6114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145105753.384, "dur": 0.913, + "args": { + "External id": 508388,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 6115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145105811.040, "dur": 10.875, + "args": { + "External id": 508389,"Record function id": 0, "Ev Idx": 6116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145105813.450, "dur": 7.515, + "args": { + "External id": 508390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145105816.217, "dur": 3.681, + "args": { + "External id": 508391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145105817.463, "dur": 2.314, + "args": { + "External id": 508392,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145105826.423, "dur": 10.899, + "args": { + "External id": 508393,"Record function id": 0, "Sequence number": 5516429, "Fwd thread id": 1, "Ev Idx": 6120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145105827.638, "dur": 6.930, + "args": { + "External id": 508394,"Sequence number": 5516429, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6121 + } + }, + { + "ph": "f", "id": 258, "pid": 2338711, "tid": 2379440, "ts": 6274145105827.638, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145105829.504, "dur": 4.819, + "args": { + "External id": 508395,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145105833.106, "dur": 1.078, + "args": { + "External id": 508396,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145105841.415, "dur": 133.870, + "args": { + "External id": 508397,"Record function id": 0, "Sequence number": 5516428, "Fwd thread id": 1, "Ev Idx": 6124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145105842.418, "dur": 126.019, + "args": { + "External id": 508398,"Sequence number": 5516428, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6125 + } + }, + { + "ph": "f", "id": 259, "pid": 2338711, "tid": 2379440, "ts": 6274145105842.418, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145105846.979, "dur": 4.836, + "args": { + "External id": 508399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145105848.966, "dur": 2.188, + "args": { + "External id": 508400,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145105850.501, "dur": 0.505, + "args": { + "External id": 508401,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145105853.403, "dur": 48.137, + "args": { + "External id": 508402,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145105905.389, "dur": 4.629, + "args": { + "External id": 508403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145105906.446, "dur": 2.977, + "args": { + "External id": 508404,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145105908.059, "dur": 1.142, + "args": { + "External id": 508405,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145105911.689, "dur": 3.049, + "args": { + "External id": 508406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145105912.651, "dur": 1.624, + "args": { + "External id": 508407,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145105913.804, "dur": 0.405, + "args": { + "External id": 508408,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145105917.566, "dur": 49.953, + "args": { + "External id": 508409,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145105980.309, "dur": 7.892, + "args": { + "External id": 508410,"Record function id": 0, "Sequence number": 5516427, "Fwd thread id": 1, "Ev Idx": 6137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145105981.482, "dur": 4.893, + "args": { + "External id": 508411,"Sequence number": 5516427, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6138 + } + }, + { + "ph": "f", "id": 260, "pid": 2338711, "tid": 2379440, "ts": 6274145105981.482, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145105983.710, "dur": 2.529, + "args": { + "External id": 508412,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145105984.610, "dur": 1.498, + "args": { + "External id": 508413,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145105991.694, "dur": 50.079, + "args": { + "External id": 508414,"Record function id": 0, "Sequence number": 5516426, "Fwd thread id": 1, "Ev Idx": 6141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145105992.791, "dur": 46.595, + "args": { + "External id": 508415,"Sequence number": 5516426, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6142 + } + }, + { + "ph": "f", "id": 261, "pid": 2338711, "tid": 2379440, "ts": 6274145105992.791, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145105993.502, "dur": 45.656, + "args": { + "External id": 508416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145106026.768, "dur": 11.756, + "args": { + "External id": 513025,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145106037.747, "dur": 0.660, + "args": { + "External id": 513026,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145106046.041, "dur": 60.247, + "args": { + "External id": 513027,"Record function id": 0, "Ev Idx": 6146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145106047.204, "dur": 57.640, + "args": { + "External id": 513028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145106048.345, "dur": 55.938, + "args": { + "External id": 513029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145106051.246, "dur": 52.545, + "args": { + "External id": 513030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145106112.477, "dur": 8.432, + "args": { + "External id": 513031,"Record function id": 0, "Sequence number": 5516425, "Fwd thread id": 1, "Ev Idx": 6150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145106114.106, "dur": 4.055, + "args": { + "External id": 513032,"Sequence number": 5516425, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6151 + } + }, + { + "ph": "f", "id": 262, "pid": 2338711, "tid": 2379440, "ts": 6274145106114.106, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145106115.715, "dur": 2.292, + "args": { + "External id": 513033,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145106116.640, "dur": 1.222, + "args": { + "External id": 513034,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145106124.254, "dur": 137.681, + "args": { + "External id": 513035,"Record function id": 0, "Sequence number": 5516424, "Fwd thread id": 1, "Ev Idx": 6154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145106125.074, "dur": 127.619, + "args": { + "External id": 513036,"Sequence number": 5516424, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6155 + } + }, + { + "ph": "f", "id": 263, "pid": 2338711, "tid": 2379440, "ts": 6274145106125.074, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145106127.808, "dur": 5.372, + "args": { + "External id": 513037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145106130.515, "dur": 2.172, + "args": { + "External id": 513038,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145106131.865, "dur": 0.694, + "args": { + "External id": 513039,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145106134.265, "dur": 57.490, + "args": { + "External id": 513040,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145106192.985, "dur": 3.550, + "args": { + "External id": 513041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145106194.035, "dur": 1.976, + "args": { + "External id": 513042,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145106195.222, "dur": 0.679, + "args": { + "External id": 513043,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145106199.859, "dur": 6.092, + "args": { + "External id": 513044,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145106200.931, "dur": 4.534, + "args": { + "External id": 513045,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145106202.797, "dur": 2.603, + "args": { + "External id": 513046,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145106206.661, "dur": 45.282, + "args": { + "External id": 513047,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145106267.328, "dur": 37.741, + "args": { + "External id": 513048,"Record function id": 0, "Sequence number": 5516423, "Fwd thread id": 1, "Ev Idx": 6167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145106268.382, "dur": 4.262, + "args": { + "External id": 513049,"Sequence number": 5516423, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6168 + } + }, + { + "ph": "f", "id": 264, "pid": 2338711, "tid": 2379440, "ts": 6274145106268.382, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145106270.204, "dur": 2.296, + "args": { + "External id": 513050,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145106271.237, "dur": 1.158, + "args": { + "External id": 513051,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2379440, + "ts": 6274145106276.781, "dur": 25.551, + "args": { + "External id": 513052,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145106308.817, "dur": 11.654, + "args": { + "External id": 513053,"Record function id": 0, "Sequence number": 5516422, "Fwd thread id": 1, "Ev Idx": 6172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145106312.547, "dur": 5.717, + "args": { + "External id": 513054,"Sequence number": 5516422, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6173 + } + }, + { + "ph": "f", "id": 265, "pid": 2338711, "tid": 2379440, "ts": 6274145106312.547, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145106313.393, "dur": 4.653, + "args": { + "External id": 513055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145106314.469, "dur": 3.099, + "args": { + "External id": 513056,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145106316.368, "dur": 1.079, + "args": { + "External id": 513057,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145106324.902, "dur": 6.056, + "args": { + "External id": 513058,"Record function id": 0, "Ev Idx": 6177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145106326.943, "dur": 3.466, + "args": { + "External id": 513059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145106328.217, "dur": 1.923, + "args": { + "External id": 513060,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145106328.832, "dur": 1.180, + "args": { + "External id": 513061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145106335.951, "dur": 522.447, + "args": { + "External id": 513062,"Record function id": 0, "Sequence number": 5516421, "Fwd thread id": 1, "Ev Idx": 6181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145106337.790, "dur": 505.442, + "args": { + "External id": 513063,"Sequence number": 5516421, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 6182 + } + }, + { + "ph": "f", "id": 266, "pid": 2338711, "tid": 2379440, "ts": 6274145106337.790, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2379440, + "ts": 6274145106369.696, "dur": 39.534, + "args": { + "External id": 513064,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274145106371.508, "dur": 37.536, + "args": { + "External id": 513065,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145106375.180, "dur": 7.156, + "args": { + "External id": 513066,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 6185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145106378.407, "dur": 3.364, + "args": { + "External id": 513067,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145106384.007, "dur": 24.566, + "args": { + "External id": 513068,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145106425.771, "dur": 2.444, + "args": { + "External id": 513069,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145106426.891, "dur": 1.213, + "args": { + "External id": 513070,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145106432.476, "dur": 1.660, + "args": { + "External id": 513071,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145106433.312, "dur": 0.724, + "args": { + "External id": 513072,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145106449.246, "dur": 4.750, + "args": { + "External id": 513073,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145106468.280, "dur": 2.736, + "args": { + "External id": 513074,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145106708.404, "dur": 4.228, + "args": { + "External id": 513075,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145106718.665, "dur": 44.569, + "args": { + "External id": 513076,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145106735.359, "dur": 1.178, + "args": { + "External id": 513077,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145106770.653, "dur": 35.848, + "args": { + "External id": 513078,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145106772.859, "dur": 33.430, + "args": { + "External id": 513079,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145106779.912, "dur": 4.806, + "args": { + "External id": 513080,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145106786.505, "dur": 19.349, + "args": { + "External id": 513081,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274145106811.631, "dur": 3.145, + "args": { + "External id": 513082,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145106813.480, "dur": 1.156, + "args": { + "External id": 513083,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145106822.484, "dur": 2.638, + "args": { + "External id": 513084,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145106823.881, "dur": 1.128, + "args": { + "External id": 513085,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145106827.580, "dur": 4.781, + "args": { + "External id": 513086,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145106831.355, "dur": 0.885, + "args": { + "External id": 513087,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145106874.378, "dur": 11.299, + "args": { + "External id": 513088,"Record function id": 0, "Ev Idx": 6207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145106877.539, "dur": 7.267, + "args": { + "External id": 513089,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145106880.558, "dur": 3.311, + "args": { + "External id": 513090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145106881.938, "dur": 1.813, + "args": { + "External id": 513091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145106889.633, "dur": 10.733, + "args": { + "External id": 513092,"Record function id": 0, "Sequence number": 5516420, "Fwd thread id": 1, "Ev Idx": 6211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145106891.014, "dur": 6.774, + "args": { + "External id": 513093,"Sequence number": 5516420, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6212 + } + }, + { + "ph": "f", "id": 267, "pid": 2338711, "tid": 2379440, "ts": 6274145106891.014, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145106892.833, "dur": 4.747, + "args": { + "External id": 513094,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145106896.016, "dur": 1.401, + "args": { + "External id": 513095,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145106904.052, "dur": 202.203, + "args": { + "External id": 513096,"Record function id": 0, "Sequence number": 5516419, "Fwd thread id": 1, "Ev Idx": 6215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145106905.030, "dur": 195.251, + "args": { + "External id": 513097,"Sequence number": 5516419, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6216 + } + }, + { + "ph": "f", "id": 268, "pid": 2338711, "tid": 2379440, "ts": 6274145106905.030, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145106908.381, "dur": 5.933, + "args": { + "External id": 513098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145106910.423, "dur": 3.214, + "args": { + "External id": 513099,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145106912.614, "dur": 0.848, + "args": { + "External id": 513100,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145106915.854, "dur": 87.298, + "args": { + "External id": 513101,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145107004.623, "dur": 6.357, + "args": { + "External id": 513102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145107008.065, "dur": 2.182, + "args": { + "External id": 513103,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145107009.505, "dur": 0.620, + "args": { + "External id": 513104,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145107012.595, "dur": 5.735, + "args": { + "External id": 513105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145107013.595, "dur": 4.257, + "args": { + "External id": 513106,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145107015.014, "dur": 2.754, + "args": { + "External id": 513107,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145107019.135, "dur": 79.507, + "args": { + "External id": 513108,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145107114.545, "dur": 12.813, + "args": { + "External id": 513109,"Record function id": 0, "Sequence number": 5516418, "Fwd thread id": 1, "Ev Idx": 6228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145107115.826, "dur": 9.378, + "args": { + "External id": 513110,"Sequence number": 5516418, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6229 + } + }, + { + "ph": "f", "id": 269, "pid": 2338711, "tid": 2379440, "ts": 6274145107115.826, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145107121.202, "dur": 3.830, + "args": { + "External id": 513111,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145107122.775, "dur": 2.155, + "args": { + "External id": 513112,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145107131.168, "dur": 8.138, + "args": { + "External id": 513113,"Record function id": 0, "Sequence number": 5516417, "Fwd thread id": 1, "Ev Idx": 6232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145107132.015, "dur": 5.152, + "args": { + "External id": 513114,"Sequence number": 5516417, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6233 + } + }, + { + "ph": "f", "id": 270, "pid": 2338711, "tid": 2379440, "ts": 6274145107132.015, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145107133.049, "dur": 3.900, + "args": { + "External id": 513115,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145107133.794, "dur": 2.666, + "args": { + "External id": 513116,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145107135.672, "dur": 0.664, + "args": { + "External id": 513117,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145107143.991, "dur": 8.803, + "args": { + "External id": 513118,"Record function id": 0, "Ev Idx": 6237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145107145.242, "dur": 6.979, + "args": { + "External id": 513119,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145107146.958, "dur": 4.989, + "args": { + "External id": 513120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145107150.077, "dur": 1.749, + "args": { + "External id": 513121,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145107156.189, "dur": 7.800, + "args": { + "External id": 513122,"Record function id": 0, "Sequence number": 5516416, "Fwd thread id": 1, "Ev Idx": 6241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145107157.309, "dur": 4.005, + "args": { + "External id": 513123,"Sequence number": 5516416, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6242 + } + }, + { + "ph": "f", "id": 271, "pid": 2338711, "tid": 2379440, "ts": 6274145107157.309, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145107158.350, "dur": 2.804, + "args": { + "External id": 513124,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145107159.727, "dur": 1.277, + "args": { + "External id": 513125,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145107169.673, "dur": 405.135, + "args": { + "External id": 513126,"Record function id": 0, "Sequence number": 5516415, "Fwd thread id": 1, "Ev Idx": 6245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145107171.149, "dur": 384.948, + "args": { + "External id": 513127,"Sequence number": 5516415, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6246 + } + }, + { + "ph": "f", "id": 272, "pid": 2338711, "tid": 2379440, "ts": 6274145107171.149, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145107189.922, "dur": 9.330, + "args": { + "External id": 513128,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145107193.371, "dur": 5.402, + "args": { + "External id": 513129,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145107201.683, "dur": 3.713, + "args": { + "External id": 513130,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145107203.489, "dur": 1.708, + "args": { + "External id": 513131,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145107207.029, "dur": 6.175, + "args": { + "External id": 513132,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145107208.638, "dur": 4.398, + "args": { + "External id": 513133,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145107252.987, "dur": 275.384, + "args": { + "External id": 513134,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 6253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145107343.995, "dur": 6.595, + "args": { + "External id": 513135,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145107352.617, "dur": 3.747, + "args": { + "External id": 513136,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145107357.785, "dur": 2.480, + "args": { + "External id": 513137,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145107361.463, "dur": 2.562, + "args": { + "External id": 513138,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145107427.170, "dur": 2.901, + "args": { + "External id": 513139,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145107428.482, "dur": 1.479, + "args": { + "External id": 513140,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145107432.141, "dur": 28.641, + "args": { + "External id": 513141,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145107438.436, "dur": 0.784, + "args": { + "External id": 513142,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145107464.357, "dur": 1.910, + "args": { + "External id": 513143,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145107465.693, "dur": 0.494, + "args": { + "External id": 513144,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145107467.285, "dur": 15.675, + "args": { + "External id": 513145,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145107471.405, "dur": 0.412, + "args": { + "External id": 513146,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274145107543.189, "dur": 3.622, + "args": { + "External id": 513147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274145107550.236, "dur": 0.585, + "args": { + "External id": 513148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274145107552.746, "dur": 0.435, + "args": { + "External id": 513149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145107583.252, "dur": 291.061, + "args": { + "External id": 513150,"Record function id": 0, "Sequence number": 5516414, "Fwd thread id": 1, "Ev Idx": 6269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145107584.914, "dur": 280.606, + "args": { + "External id": 513151,"Sequence number": 5516414, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6270 + } + }, + { + "ph": "f", "id": 273, "pid": 2338711, "tid": 2379440, "ts": 6274145107584.914, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274145107633.598, "dur": 62.253, + "args": { + "External id": 513152,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145107638.683, "dur": 5.891, + "args": { + "External id": 513153,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145107647.009, "dur": 48.139, + "args": { + "External id": 513154,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 6273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145107707.626, "dur": 5.943, + "args": { + "External id": 513155,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145107710.933, "dur": 2.318, + "args": { + "External id": 513156,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145107885.276, "dur": 226.046, + "args": { + "External id": 513157,"Record function id": 0, "Sequence number": 5516413, "Fwd thread id": 1, "Ev Idx": 6276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145107887.518, "dur": 214.363, + "args": { + "External id": 513158,"Sequence number": 5516413, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6277 + } + }, + { + "ph": "f", "id": 274, "pid": 2338711, "tid": 2379440, "ts": 6274145107887.518, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274145107902.209, "dur": 49.547, + "args": { + "External id": 513159,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145107905.037, "dur": 3.081, + "args": { + "External id": 513160,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145107909.097, "dur": 42.181, + "args": { + "External id": 513161,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 6280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145107959.280, "dur": 5.515, + "args": { + "External id": 513162,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145107961.658, "dur": 2.871, + "args": { + "External id": 513163,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108121.761, "dur": 16.042, + "args": { + "External id": 513164,"Record function id": 0, "Sequence number": 5516412, "Fwd thread id": 1, "Ev Idx": 6283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108123.977, "dur": 10.946, + "args": { + "External id": 513165,"Sequence number": 5516412, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6284 + } + }, + { + "ph": "f", "id": 275, "pid": 2338711, "tid": 2379440, "ts": 6274145108123.977, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145108127.470, "dur": 7.236, + "args": { + "External id": 513166,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145108129.182, "dur": 5.308, + "args": { + "External id": 513167,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108141.800, "dur": 9.272, + "args": { + "External id": 513168,"Record function id": 0, "Sequence number": 5516411, "Fwd thread id": 1, "Ev Idx": 6287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108142.677, "dur": 6.747, + "args": { + "External id": 513169,"Sequence number": 5516411, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6288 + } + }, + { + "ph": "f", "id": 276, "pid": 2338711, "tid": 2379440, "ts": 6274145108142.677, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145108144.275, "dur": 5.002, + "args": { + "External id": 513170,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145108148.152, "dur": 1.017, + "args": { + "External id": 513171,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108154.681, "dur": 11.306, + "args": { + "External id": 513172,"Record function id": 0, "Sequence number": 5516410, "Fwd thread id": 1, "Ev Idx": 6291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108160.314, "dur": 3.745, + "args": { + "External id": 513173,"Sequence number": 5516410, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6292 + } + }, + { + "ph": "f", "id": 277, "pid": 2338711, "tid": 2379440, "ts": 6274145108160.314, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145108161.794, "dur": 2.129, + "args": { + "External id": 513174,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145108162.767, "dur": 1.031, + "args": { + "External id": 513175,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108169.532, "dur": 11.014, + "args": { + "External id": 513176,"Record function id": 0, "Sequence number": 5516409, "Fwd thread id": 1, "Ev Idx": 6295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108170.870, "dur": 7.858, + "args": { + "External id": 513177,"Sequence number": 5516409, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6296 + } + }, + { + "ph": "f", "id": 278, "pid": 2338711, "tid": 2379440, "ts": 6274145108170.870, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145108172.111, "dur": 6.482, + "args": { + "External id": 513178,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145108175.207, "dur": 3.303, + "args": { + "External id": 513179,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108184.457, "dur": 171.492, + "args": { + "External id": 513180,"Record function id": 0, "Sequence number": 5516408, "Fwd thread id": 1, "Ev Idx": 6299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108185.510, "dur": 164.158, + "args": { + "External id": 513181,"Sequence number": 5516408, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6300 + } + }, + { + "ph": "f", "id": 279, "pid": 2338711, "tid": 2379440, "ts": 6274145108185.510, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145108189.964, "dur": 8.262, + "args": { + "External id": 513182,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145108192.362, "dur": 5.142, + "args": { + "External id": 513183,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145108195.137, "dur": 2.028, + "args": { + "External id": 513184,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145108200.126, "dur": 83.747, + "args": { + "External id": 513185,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145108287.751, "dur": 4.669, + "args": { + "External id": 513186,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145108288.691, "dur": 2.792, + "args": { + "External id": 513187,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145108290.451, "dur": 0.888, + "args": { + "External id": 513188,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145108294.222, "dur": 3.005, + "args": { + "External id": 513189,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145108295.256, "dur": 1.538, + "args": { + "External id": 513190,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145108296.268, "dur": 0.415, + "args": { + "External id": 513191,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145108300.793, "dur": 48.155, + "args": { + "External id": 513192,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108361.399, "dur": 7.391, + "args": { + "External id": 513193,"Record function id": 0, "Sequence number": 5516407, "Fwd thread id": 1, "Ev Idx": 6312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108362.315, "dur": 4.597, + "args": { + "External id": 513194,"Sequence number": 5516407, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6313 + } + }, + { + "ph": "f", "id": 280, "pid": 2338711, "tid": 2379440, "ts": 6274145108362.315, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145108364.133, "dur": 2.626, + "args": { + "External id": 513195,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145108365.291, "dur": 1.357, + "args": { + "External id": 513196,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108372.066, "dur": 10.343, + "args": { + "External id": 513197,"Record function id": 0, "Sequence number": 5516406, "Fwd thread id": 1, "Ev Idx": 6316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108373.011, "dur": 7.734, + "args": { + "External id": 513198,"Sequence number": 5516406, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6317 + } + }, + { + "ph": "f", "id": 281, "pid": 2338711, "tid": 2379440, "ts": 6274145108373.011, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145108374.129, "dur": 6.413, + "args": { + "External id": 513199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145108374.832, "dur": 5.181, + "args": { + "External id": 513200,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145108379.278, "dur": 0.646, + "args": { + "External id": 513201,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145108388.748, "dur": 12.598, + "args": { + "External id": 513202,"Record function id": 0, "Ev Idx": 6321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145108390.618, "dur": 9.892, + "args": { + "External id": 513203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145108393.346, "dur": 6.705, + "args": { + "External id": 513204,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145108394.751, "dur": 5.191, + "args": { + "External id": 513205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108404.846, "dur": 6.421, + "args": { + "External id": 513206,"Record function id": 0, "Sequence number": 5516405, "Fwd thread id": 1, "Ev Idx": 6325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108405.994, "dur": 3.674, + "args": { + "External id": 513207,"Sequence number": 5516405, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6326 + } + }, + { + "ph": "f", "id": 282, "pid": 2338711, "tid": 2379440, "ts": 6274145108405.994, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145108407.414, "dur": 2.107, + "args": { + "External id": 513208,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145108408.595, "dur": 0.798, + "args": { + "External id": 513209,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108414.832, "dur": 95.497, + "args": { + "External id": 513210,"Record function id": 0, "Sequence number": 5516404, "Fwd thread id": 1, "Ev Idx": 6329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108417.785, "dur": 86.975, + "args": { + "External id": 513211,"Sequence number": 5516404, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6330 + } + }, + { + "ph": "f", "id": 283, "pid": 2338711, "tid": 2379440, "ts": 6274145108417.785, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145108419.763, "dur": 2.441, + "args": { + "External id": 513212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145108420.286, "dur": 1.487, + "args": { + "External id": 513213,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145108421.311, "dur": 0.368, + "args": { + "External id": 513214,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145108422.854, "dur": 26.541, + "args": { + "External id": 513215,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145108450.542, "dur": 9.480, + "args": { + "External id": 513216,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145108451.223, "dur": 8.172, + "args": { + "External id": 513217,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145108457.162, "dur": 2.034, + "args": { + "External id": 513218,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145108461.153, "dur": 2.979, + "args": { + "External id": 513219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145108461.761, "dur": 1.883, + "args": { + "External id": 513220,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145108463.168, "dur": 0.393, + "args": { + "External id": 513221,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145108464.862, "dur": 38.993, + "args": { + "External id": 513222,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108515.555, "dur": 39.153, + "args": { + "External id": 513223,"Record function id": 0, "Sequence number": 5516403, "Fwd thread id": 1, "Ev Idx": 6342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108516.647, "dur": 7.315, + "args": { + "External id": 513224,"Sequence number": 5516403, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6343 + } + }, + { + "ph": "f", "id": 284, "pid": 2338711, "tid": 2379440, "ts": 6274145108516.647, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145108518.511, "dur": 5.316, + "args": { + "External id": 513225,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145108522.231, "dur": 1.462, + "args": { + "External id": 513226,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2379440, + "ts": 6274145108527.226, "dur": 25.068, + "args": { + "External id": 513227,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108558.800, "dur": 7.900, + "args": { + "External id": 513228,"Record function id": 0, "Sequence number": 5516402, "Fwd thread id": 1, "Ev Idx": 6347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108559.640, "dur": 4.974, + "args": { + "External id": 513229,"Sequence number": 5516402, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6348 + } + }, + { + "ph": "f", "id": 285, "pid": 2338711, "tid": 2379440, "ts": 6274145108559.640, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145108560.287, "dur": 4.104, + "args": { + "External id": 513230,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145108561.439, "dur": 2.425, + "args": { + "External id": 513231,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145108562.892, "dur": 0.852, + "args": { + "External id": 513232,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145108571.108, "dur": 7.112, + "args": { + "External id": 513233,"Record function id": 0, "Ev Idx": 6352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145108572.440, "dur": 5.275, + "args": { + "External id": 513234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145108573.346, "dur": 4.032, + "args": { + "External id": 513235,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145108576.147, "dur": 1.127, + "args": { + "External id": 513236,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108581.788, "dur": 27.599, + "args": { + "External id": 513237,"Record function id": 0, "Sequence number": 5516401, "Fwd thread id": 1, "Ev Idx": 6356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108582.867, "dur": 2.863, + "args": { + "External id": 513238,"Sequence number": 5516401, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6357 + } + }, + { + "ph": "f", "id": 286, "pid": 2338711, "tid": 2379440, "ts": 6274145108582.867, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145108583.707, "dur": 1.866, + "args": { + "External id": 513239,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145108584.528, "dur": 0.954, + "args": { + "External id": 513240,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108617.639, "dur": 145.136, + "args": { + "External id": 513241,"Record function id": 0, "Sequence number": 5516400, "Fwd thread id": 1, "Ev Idx": 6360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108619.523, "dur": 127.613, + "args": { + "External id": 513242,"Sequence number": 5516400, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6361 + } + }, + { + "ph": "f", "id": 287, "pid": 2338711, "tid": 2379440, "ts": 6274145108619.523, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145108623.470, "dur": 8.072, + "args": { + "External id": 513243,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145108627.134, "dur": 3.493, + "args": { + "External id": 513244,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145108629.486, "dur": 0.942, + "args": { + "External id": 513245,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145108632.579, "dur": 52.219, + "args": { + "External id": 513246,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145108686.383, "dur": 7.363, + "args": { + "External id": 513247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145108690.446, "dur": 2.648, + "args": { + "External id": 513248,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145108692.104, "dur": 0.891, + "args": { + "External id": 513249,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145108694.900, "dur": 5.515, + "args": { + "External id": 513250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145108698.077, "dur": 1.869, + "args": { + "External id": 513251,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145108699.368, "dur": 0.486, + "args": { + "External id": 513252,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145108700.841, "dur": 42.041, + "args": { + "External id": 513253,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108768.442, "dur": 32.474, + "args": { + "External id": 513254,"Record function id": 0, "Sequence number": 5516399, "Fwd thread id": 1, "Ev Idx": 6373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108769.760, "dur": 5.382, + "args": { + "External id": 513255,"Sequence number": 5516399, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6374 + } + }, + { + "ph": "f", "id": 288, "pid": 2338711, "tid": 2379440, "ts": 6274145108769.760, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145108771.638, "dur": 3.326, + "args": { + "External id": 513256,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145108773.065, "dur": 1.736, + "args": { + "External id": 513257,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145108778.715, "dur": 20.047, + "args": { + "External id": 513258,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108804.829, "dur": 9.673, + "args": { + "External id": 513259,"Record function id": 0, "Sequence number": 5516398, "Fwd thread id": 1, "Ev Idx": 6378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145108807.694, "dur": 5.004, + "args": { + "External id": 513260,"Sequence number": 5516398, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6379 + } + }, + { + "ph": "f", "id": 289, "pid": 2338711, "tid": 2379440, "ts": 6274145108807.694, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145108808.536, "dur": 3.973, + "args": { + "External id": 513261,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145108809.589, "dur": 2.375, + "args": { + "External id": 513262,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145108811.099, "dur": 0.770, + "args": { + "External id": 513263,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145108818.783, "dur": 6.899, + "args": { + "External id": 513264,"Record function id": 0, "Ev Idx": 6383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145108820.528, "dur": 4.668, + "args": { + "External id": 513265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145108822.022, "dur": 2.585, + "args": { + "External id": 513266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145108823.151, "dur": 1.364, + "args": { + "External id": 513267,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145108830.296, "dur": 442.340, + "args": { + "External id": 513268,"Record function id": 0, "Sequence number": 5516397, "Fwd thread id": 1, "Ev Idx": 6387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145108831.629, "dur": 405.360, + "args": { + "External id": 513269,"Sequence number": 5516397, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6388 + } + }, + { + "ph": "f", "id": 290, "pid": 2338711, "tid": 2379440, "ts": 6274145108831.629, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145108870.315, "dur": 4.581, + "args": { + "External id": 513270,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145108873.683, "dur": 1.078, + "args": { + "External id": 513271,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145108891.157, "dur": 4.577, + "args": { + "External id": 513272,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145108906.464, "dur": 2.082, + "args": { + "External id": 513273,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145109121.422, "dur": 3.063, + "args": { + "External id": 513274,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145109129.353, "dur": 43.112, + "args": { + "External id": 513275,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145109144.496, "dur": 2.789, + "args": { + "External id": 513276,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145109178.989, "dur": 34.025, + "args": { + "External id": 513277,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145109183.532, "dur": 29.276, + "args": { + "External id": 513278,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145109188.188, "dur": 5.244, + "args": { + "External id": 513279,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145109195.405, "dur": 16.904, + "args": { + "External id": 513280,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274145109218.388, "dur": 2.676, + "args": { + "External id": 513281,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145109219.811, "dur": 1.154, + "args": { + "External id": 513282,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145109227.716, "dur": 2.117, + "args": { + "External id": 513283,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145109228.751, "dur": 0.941, + "args": { + "External id": 513284,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2379440, + "ts": 6274145109248.707, "dur": 16.445, + "args": { + "External id": 513285,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145109286.016, "dur": 11.396, + "args": { + "External id": 513286,"Record function id": 0, "Ev Idx": 6405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145109288.148, "dur": 8.427, + "args": { + "External id": 513287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145109290.421, "dur": 5.159, + "args": { + "External id": 513288,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145109293.934, "dur": 1.568, + "args": { + "External id": 513289,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145109301.516, "dur": 6.983, + "args": { + "External id": 513290,"Record function id": 0, "Sequence number": 5516396, "Fwd thread id": 1, "Ev Idx": 6409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145109302.916, "dur": 1.523, + "args": { + "External id": 513291,"Sequence number": 5516396, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6410 + } + }, + { + "ph": "f", "id": 291, "pid": 2338711, "tid": 2379440, "ts": 6274145109302.916, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145109312.473, "dur": 475.399, + "args": { + "External id": 513292,"Record function id": 0, "Sequence number": 5516395, "Fwd thread id": 1, "Ev Idx": 6411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145109313.980, "dur": 461.518, + "args": { + "External id": 513293,"Sequence number": 5516395, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6412 + } + }, + { + "ph": "f", "id": 292, "pid": 2338711, "tid": 2379440, "ts": 6274145109313.980, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145109348.038, "dur": 9.895, + "args": { + "External id": 513294,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338711, "tid": 2379440, + "ts": 6274145109353.762, "dur": 3.902, + "args": { + "External id": 513295,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 6414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145109361.350, "dur": 7.877, + "args": { + "External id": 513296,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145109365.767, "dur": 2.707, + "args": { + "External id": 513297,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145109367.703, "dur": 0.605, + "args": { + "External id": 513298,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2379440, + "ts": 6274145109372.740, "dur": 93.424, + "args": { + "External id": 513299,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 6418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145109373.478, "dur": 3.764, + "args": { + "External id": 513300,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 6419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145109374.285, "dur": 2.489, + "args": { + "External id": 513301,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145109375.807, "dur": 0.881, + "args": { + "External id": 513302,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2379440, + "ts": 6274145109380.913, "dur": 84.738, + "args": { + "External id": 513303,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145109382.636, "dur": 82.028, + "args": { + "External id": 513304,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274145109469.881, "dur": 3.446, + "args": { + "External id": 513305,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 6424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145109471.541, "dur": 1.659, + "args": { + "External id": 513306,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145109506.569, "dur": 7.028, + "args": { + "External id": 513307,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145109515.123, "dur": 2.437, + "args": { + "External id": 513308,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145109518.760, "dur": 2.514, + "args": { + "External id": 513309,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145109559.658, "dur": 2.647, + "args": { + "External id": 513310,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145109560.986, "dur": 1.149, + "args": { + "External id": 513311,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338711, "tid": 2379440, + "ts": 6274145109584.705, "dur": 169.014, + "args": { + "External id": 513312,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 6431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2379440, + "ts": 6274145109610.283, "dur": 9.132, + "args": { + "External id": 513313,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145109615.940, "dur": 1.608, + "args": { + "External id": 513314,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274145109621.851, "dur": 7.873, + "args": { + "External id": 513315,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 6434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145109628.066, "dur": 0.653, + "args": { + "External id": 513316,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 6435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2379440, + "ts": 6274145109631.775, "dur": 3.600, + "args": { + "External id": 513317,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145109634.075, "dur": 0.696, + "args": { + "External id": 513318,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274145109637.109, "dur": 4.393, + "args": { + "External id": 513319,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145109639.856, "dur": 0.810, + "args": { + "External id": 513320,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 6439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274145109648.935, "dur": 6.456, + "args": { + "External id": 513321,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 6440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145109651.865, "dur": 2.730, + "args": { + "External id": 513322,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 6441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145109656.679, "dur": 7.389, + "args": { + "External id": 513323,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 6442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338711, "tid": 2379440, + "ts": 6274145109661.528, "dur": 2.401, + "args": { + "External id": 513324,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 6443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274145109665.705, "dur": 3.004, + "args": { + "External id": 513325,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 6444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145109668.130, "dur": 0.318, + "args": { + "External id": 513326,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 6445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145109669.576, "dur": 2.473, + "args": { + "External id": 513327,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145109670.525, "dur": 1.430, + "args": { + "External id": 513328,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274145109673.473, "dur": 65.899, + "args": { + "External id": 513329,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 6448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145109743.761, "dur": 1.068, + "args": { + "External id": 513330,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 6449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274145109746.356, "dur": 3.150, + "args": { + "External id": 513331,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 6450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145109748.387, "dur": 0.538, + "args": { + "External id": 513332,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 6451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145109751.741, "dur": 0.795, + "args": { + "External id": 513333,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 6452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145109800.957, "dur": 10.002, + "args": { + "External id": 513334,"Record function id": 0, "Ev Idx": 6453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145109803.608, "dur": 6.629, + "args": { + "External id": 513335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145109805.805, "dur": 3.442, + "args": { + "External id": 513336,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145109807.150, "dur": 1.983, + "args": { + "External id": 513337,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145109815.052, "dur": 10.025, + "args": { + "External id": 513338,"Record function id": 0, "Sequence number": 5516394, "Fwd thread id": 1, "Ev Idx": 6457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145109816.232, "dur": 6.509, + "args": { + "External id": 513339,"Sequence number": 5516394, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6458 + } + }, + { + "ph": "f", "id": 293, "pid": 2338711, "tid": 2379440, "ts": 6274145109816.232, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145109818.150, "dur": 4.374, + "args": { + "External id": 513340,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145109821.637, "dur": 0.717, + "args": { + "External id": 513341,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145109828.825, "dur": 117.248, + "args": { + "External id": 513342,"Record function id": 0, "Sequence number": 5516393, "Fwd thread id": 1, "Ev Idx": 6461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145109830.061, "dur": 109.303, + "args": { + "External id": 513343,"Sequence number": 5516393, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6462 + } + }, + { + "ph": "f", "id": 294, "pid": 2338711, "tid": 2379440, "ts": 6274145109830.061, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145109833.625, "dur": 4.363, + "args": { + "External id": 513344,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145109835.151, "dur": 2.245, + "args": { + "External id": 513345,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145109836.801, "dur": 0.414, + "args": { + "External id": 513346,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145109841.769, "dur": 45.735, + "args": { + "External id": 513347,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145109888.867, "dur": 4.037, + "args": { + "External id": 513348,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145109889.917, "dur": 2.454, + "args": { + "External id": 513349,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145109891.275, "dur": 0.900, + "args": { + "External id": 513350,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145109894.821, "dur": 7.989, + "args": { + "External id": 513351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145109895.584, "dur": 6.748, + "args": { + "External id": 513352,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145109899.544, "dur": 2.687, + "args": { + "External id": 513353,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145109903.858, "dur": 34.608, + "args": { + "External id": 513354,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145109951.179, "dur": 6.219, + "args": { + "External id": 513355,"Record function id": 0, "Sequence number": 5516392, "Fwd thread id": 1, "Ev Idx": 6474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145109952.148, "dur": 3.693, + "args": { + "External id": 513356,"Sequence number": 5516392, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6475 + } + }, + { + "ph": "f", "id": 295, "pid": 2338711, "tid": 2379440, "ts": 6274145109952.148, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145109953.748, "dur": 1.950, + "args": { + "External id": 513357,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145109954.591, "dur": 0.955, + "args": { + "External id": 513358,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145109960.816, "dur": 9.449, + "args": { + "External id": 513359,"Record function id": 0, "Sequence number": 5516391, "Fwd thread id": 1, "Ev Idx": 6478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145109961.713, "dur": 6.608, + "args": { + "External id": 513360,"Sequence number": 5516391, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6479 + } + }, + { + "ph": "f", "id": 296, "pid": 2338711, "tid": 2379440, "ts": 6274145109961.713, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145109962.384, "dur": 5.717, + "args": { + "External id": 513361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145109965.608, "dur": 2.001, + "args": { + "External id": 513362,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145109966.872, "dur": 0.563, + "args": { + "External id": 513363,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145109974.625, "dur": 6.292, + "args": { + "External id": 513364,"Record function id": 0, "Ev Idx": 6483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145109976.991, "dur": 3.452, + "args": { + "External id": 513365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145109978.033, "dur": 2.155, + "args": { + "External id": 513366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145109979.040, "dur": 1.061, + "args": { + "External id": 513367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145109984.192, "dur": 5.944, + "args": { + "External id": 513368,"Record function id": 0, "Sequence number": 5516390, "Fwd thread id": 1, "Ev Idx": 6487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145109985.427, "dur": 2.518, + "args": { + "External id": 513369,"Sequence number": 5516390, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6488 + } + }, + { + "ph": "f", "id": 297, "pid": 2338711, "tid": 2379440, "ts": 6274145109985.427, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145109986.326, "dur": 1.466, + "args": { + "External id": 513370,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145109987.018, "dur": 0.640, + "args": { + "External id": 513371,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145109995.939, "dur": 141.827, + "args": { + "External id": 513372,"Record function id": 0, "Sequence number": 5516389, "Fwd thread id": 1, "Ev Idx": 6491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145109996.638, "dur": 131.712, + "args": { + "External id": 513373,"Sequence number": 5516389, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6492 + } + }, + { + "ph": "f", "id": 298, "pid": 2338711, "tid": 2379440, "ts": 6274145109996.638, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145109999.097, "dur": 2.466, + "args": { + "External id": 513374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145109999.493, "dur": 1.602, + "args": { + "External id": 513375,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145110000.603, "dur": 0.401, + "args": { + "External id": 513376,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145110002.348, "dur": 33.511, + "args": { + "External id": 513377,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145110036.964, "dur": 9.688, + "args": { + "External id": 513378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145110040.668, "dur": 5.416, + "args": { + "External id": 513379,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145110042.107, "dur": 3.784, + "args": { + "External id": 513380,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145110047.784, "dur": 2.681, + "args": { + "External id": 513381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145110048.464, "dur": 1.572, + "args": { + "External id": 513382,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145110049.644, "dur": 0.325, + "args": { + "External id": 513383,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145110051.177, "dur": 75.726, + "args": { + "External id": 513384,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145110145.735, "dur": 36.808, + "args": { + "External id": 513385,"Record function id": 0, "Sequence number": 5516388, "Fwd thread id": 1, "Ev Idx": 6504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145110148.593, "dur": 5.111, + "args": { + "External id": 513386,"Sequence number": 5516388, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6505 + } + }, + { + "ph": "f", "id": 299, "pid": 2338711, "tid": 2379440, "ts": 6274145110148.593, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145110150.581, "dur": 2.986, + "args": { + "External id": 513387,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145110151.717, "dur": 1.760, + "args": { + "External id": 513388,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2379440, + "ts": 6274145110156.821, "dur": 22.518, + "args": { + "External id": 513389,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145110186.034, "dur": 8.454, + "args": { + "External id": 513390,"Record function id": 0, "Sequence number": 5516387, "Fwd thread id": 1, "Ev Idx": 6509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145110187.035, "dur": 5.226, + "args": { + "External id": 513391,"Sequence number": 5516387, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6510 + } + }, + { + "ph": "f", "id": 300, "pid": 2338711, "tid": 2379440, "ts": 6274145110187.035, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145110188.312, "dur": 3.729, + "args": { + "External id": 513392,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145110189.287, "dur": 2.127, + "args": { + "External id": 513393,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145110190.760, "dur": 0.556, + "args": { + "External id": 513394,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145110199.117, "dur": 8.509, + "args": { + "External id": 513395,"Record function id": 0, "Ev Idx": 6514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145110200.311, "dur": 6.770, + "args": { + "External id": 513396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145110201.654, "dur": 5.033, + "args": { + "External id": 513397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145110204.910, "dur": 1.683, + "args": { + "External id": 513398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145110212.478, "dur": 475.743, + "args": { + "External id": 513399,"Record function id": 0, "Sequence number": 5516386, "Fwd thread id": 1, "Ev Idx": 6518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145110214.061, "dur": 423.593, + "args": { + "External id": 513400,"Sequence number": 5516386, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 6519 + } + }, + { + "ph": "f", "id": 301, "pid": 2338711, "tid": 2379440, "ts": 6274145110214.061, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2379440, + "ts": 6274145110240.572, "dur": 34.576, + "args": { + "External id": 513401,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274145110242.197, "dur": 32.752, + "args": { + "External id": 513402,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145110245.711, "dur": 6.489, + "args": { + "External id": 513403,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 6522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145110248.320, "dur": 3.383, + "args": { + "External id": 513404,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145110253.495, "dur": 20.904, + "args": { + "External id": 513405,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145110286.775, "dur": 4.714, + "args": { + "External id": 513406,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145110287.953, "dur": 3.382, + "args": { + "External id": 513407,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145110298.186, "dur": 2.303, + "args": { + "External id": 513408,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145110299.461, "dur": 0.927, + "args": { + "External id": 513409,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145110312.574, "dur": 2.472, + "args": { + "External id": 513410,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145110326.668, "dur": 2.129, + "args": { + "External id": 513411,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145110499.716, "dur": 4.053, + "args": { + "External id": 513412,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145110508.797, "dur": 31.867, + "args": { + "External id": 513413,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145110518.693, "dur": 0.923, + "args": { + "External id": 513414,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145110546.569, "dur": 26.977, + "args": { + "External id": 513415,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145110548.657, "dur": 24.685, + "args": { + "External id": 513416,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145110553.026, "dur": 4.728, + "args": { + "External id": 513417,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145110559.128, "dur": 13.615, + "args": { + "External id": 513418,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274145110580.825, "dur": 2.615, + "args": { + "External id": 513419,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145110582.308, "dur": 1.042, + "args": { + "External id": 513420,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145110612.102, "dur": 4.438, + "args": { + "External id": 513421,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145110613.799, "dur": 2.330, + "args": { + "External id": 513422,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145110620.286, "dur": 5.321, + "args": { + "External id": 513423,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145110622.007, "dur": 3.398, + "args": { + "External id": 513424,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145110660.677, "dur": 25.521, + "args": { + "External id": 513425,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145110705.977, "dur": 13.384, + "args": { + "External id": 513426,"Record function id": 0, "Ev Idx": 6545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145110709.799, "dur": 8.398, + "args": { + "External id": 513427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145110712.882, "dur": 4.140, + "args": { + "External id": 513428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145110714.449, "dur": 2.403, + "args": { + "External id": 513429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145110723.430, "dur": 9.173, + "args": { + "External id": 513430,"Record function id": 0, "Sequence number": 5516385, "Fwd thread id": 1, "Ev Idx": 6549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145110724.751, "dur": 4.861, + "args": { + "External id": 513431,"Sequence number": 5516385, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6550 + } + }, + { + "ph": "f", "id": 302, "pid": 2338711, "tid": 2379440, "ts": 6274145110724.751, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145110726.884, "dur": 2.518, + "args": { + "External id": 513432,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145110728.111, "dur": 1.153, + "args": { + "External id": 513433,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145110736.086, "dur": 151.456, + "args": { + "External id": 513434,"Record function id": 0, "Sequence number": 5516384, "Fwd thread id": 1, "Ev Idx": 6553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145110739.302, "dur": 140.661, + "args": { + "External id": 513435,"Sequence number": 5516384, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6554 + } + }, + { + "ph": "f", "id": 303, "pid": 2338711, "tid": 2379440, "ts": 6274145110739.302, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145110743.274, "dur": 5.809, + "args": { + "External id": 513436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145110744.884, "dur": 3.555, + "args": { + "External id": 513437,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145110746.916, "dur": 1.302, + "args": { + "External id": 513438,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145110750.434, "dur": 74.703, + "args": { + "External id": 513439,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145110826.538, "dur": 7.555, + "args": { + "External id": 513440,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145110827.417, "dur": 5.848, + "args": { + "External id": 513441,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145110831.416, "dur": 1.675, + "args": { + "External id": 513442,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145110835.544, "dur": 4.215, + "args": { + "External id": 513443,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145110836.558, "dur": 2.667, + "args": { + "External id": 513444,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145110838.476, "dur": 0.673, + "args": { + "External id": 513445,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145110840.375, "dur": 38.827, + "args": { + "External id": 513446,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145110892.723, "dur": 8.506, + "args": { + "External id": 513447,"Record function id": 0, "Sequence number": 5516383, "Fwd thread id": 1, "Ev Idx": 6566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145110893.661, "dur": 5.088, + "args": { + "External id": 513448,"Sequence number": 5516383, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6567 + } + }, + { + "ph": "f", "id": 304, "pid": 2338711, "tid": 2379440, "ts": 6274145110893.661, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145110895.819, "dur": 2.803, + "args": { + "External id": 513449,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145110897.052, "dur": 1.439, + "args": { + "External id": 513450,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145110904.649, "dur": 9.537, + "args": { + "External id": 513451,"Record function id": 0, "Sequence number": 5516382, "Fwd thread id": 1, "Ev Idx": 6570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145110905.429, "dur": 6.918, + "args": { + "External id": 513452,"Sequence number": 5516382, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6571 + } + }, + { + "ph": "f", "id": 305, "pid": 2338711, "tid": 2379440, "ts": 6274145110905.429, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145110908.578, "dur": 3.568, + "args": { + "External id": 513453,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145110909.595, "dur": 2.034, + "args": { + "External id": 513454,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145110911.108, "dur": 0.374, + "args": { + "External id": 513455,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145110917.953, "dur": 7.924, + "args": { + "External id": 513456,"Record function id": 0, "Ev Idx": 6575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145110919.816, "dur": 5.572, + "args": { + "External id": 513457,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145110920.820, "dur": 4.281, + "args": { + "External id": 513458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145110921.498, "dur": 3.509, + "args": { + "External id": 513459,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145110929.054, "dur": 9.731, + "args": { + "External id": 513460,"Record function id": 0, "Sequence number": 5516381, "Fwd thread id": 1, "Ev Idx": 6579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145110930.069, "dur": 6.285, + "args": { + "External id": 513461,"Sequence number": 5516381, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6580 + } + }, + { + "ph": "f", "id": 306, "pid": 2338711, "tid": 2379440, "ts": 6274145110930.069, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145110931.181, "dur": 5.039, + "args": { + "External id": 513462,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145110934.682, "dur": 1.409, + "args": { + "External id": 513463,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145110943.105, "dur": 434.774, + "args": { + "External id": 513464,"Record function id": 0, "Sequence number": 5516380, "Fwd thread id": 1, "Ev Idx": 6583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145110944.612, "dur": 413.313, + "args": { + "External id": 513465,"Sequence number": 5516380, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6584 + } + }, + { + "ph": "f", "id": 307, "pid": 2338711, "tid": 2379440, "ts": 6274145110944.612, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145110962.864, "dur": 8.737, + "args": { + "External id": 513466,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145110966.651, "dur": 4.496, + "args": { + "External id": 513467,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145110973.848, "dur": 3.521, + "args": { + "External id": 513468,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145110975.597, "dur": 1.592, + "args": { + "External id": 513469,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145110978.943, "dur": 3.673, + "args": { + "External id": 513470,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145110980.580, "dur": 1.853, + "args": { + "External id": 513471,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145111010.973, "dur": 317.614, + "args": { + "External id": 513472,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 6591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145111148.545, "dur": 5.157, + "args": { + "External id": 513473,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145111156.469, "dur": 3.125, + "args": { + "External id": 513474,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145111161.316, "dur": 2.215, + "args": { + "External id": 513475,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145111165.018, "dur": 5.911, + "args": { + "External id": 513476,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145111226.460, "dur": 3.607, + "args": { + "External id": 513477,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145111228.162, "dur": 1.795, + "args": { + "External id": 513478,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145111231.674, "dur": 30.801, + "args": { + "External id": 513479,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145111238.102, "dur": 2.310, + "args": { + "External id": 513480,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145111264.170, "dur": 2.190, + "args": { + "External id": 513481,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145111265.608, "dur": 0.680, + "args": { + "External id": 513482,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145111267.815, "dur": 14.834, + "args": { + "External id": 513483,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145111270.348, "dur": 0.599, + "args": { + "External id": 513484,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274145111343.891, "dur": 4.278, + "args": { + "External id": 513485,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274145111351.162, "dur": 0.892, + "args": { + "External id": 513486,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274145111353.925, "dur": 1.060, + "args": { + "External id": 513487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145111388.974, "dur": 270.722, + "args": { + "External id": 513488,"Record function id": 0, "Sequence number": 5516379, "Fwd thread id": 1, "Ev Idx": 6607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145111390.989, "dur": 257.360, + "args": { + "External id": 513489,"Sequence number": 5516379, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6608 + } + }, + { + "ph": "f", "id": 308, "pid": 2338711, "tid": 2379440, "ts": 6274145111390.989, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274145111415.018, "dur": 42.691, + "args": { + "External id": 513490,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145111418.510, "dur": 3.624, + "args": { + "External id": 513491,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145111423.634, "dur": 33.304, + "args": { + "External id": 513492,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 6611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145111468.630, "dur": 5.604, + "args": { + "External id": 513493,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145111471.376, "dur": 2.513, + "args": { + "External id": 513494,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145111674.645, "dur": 194.222, + "args": { + "External id": 513495,"Record function id": 0, "Sequence number": 5516378, "Fwd thread id": 1, "Ev Idx": 6614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145111677.644, "dur": 183.291, + "args": { + "External id": 513496,"Sequence number": 5516378, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6615 + } + }, + { + "ph": "f", "id": 309, "pid": 2338711, "tid": 2379440, "ts": 6274145111677.644, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274145111692.592, "dur": 52.159, + "args": { + "External id": 513497,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145111696.937, "dur": 5.151, + "args": { + "External id": 513498,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145111703.241, "dur": 40.749, + "args": { + "External id": 513499,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 6618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145111753.741, "dur": 5.576, + "args": { + "External id": 513500,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145111756.508, "dur": 2.500, + "args": { + "External id": 513501,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145111876.418, "dur": 17.921, + "args": { + "External id": 513502,"Record function id": 0, "Sequence number": 5516377, "Fwd thread id": 1, "Ev Idx": 6621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145111880.401, "dur": 11.013, + "args": { + "External id": 513503,"Sequence number": 5516377, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6622 + } + }, + { + "ph": "f", "id": 310, "pid": 2338711, "tid": 2379440, "ts": 6274145111880.401, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145111883.111, "dur": 7.940, + "args": { + "External id": 513504,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145111884.947, "dur": 5.873, + "args": { + "External id": 513505,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145111898.306, "dur": 7.023, + "args": { + "External id": 513506,"Record function id": 0, "Sequence number": 5516376, "Fwd thread id": 1, "Ev Idx": 6625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145111899.571, "dur": 3.560, + "args": { + "External id": 513507,"Sequence number": 5516376, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6626 + } + }, + { + "ph": "f", "id": 311, "pid": 2338711, "tid": 2379440, "ts": 6274145111899.571, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145111900.712, "dur": 2.271, + "args": { + "External id": 513508,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145111901.938, "dur": 0.932, + "args": { + "External id": 513509,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145111908.947, "dur": 9.031, + "args": { + "External id": 513510,"Record function id": 0, "Sequence number": 5516375, "Fwd thread id": 1, "Ev Idx": 6629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145111909.860, "dur": 6.000, + "args": { + "External id": 513511,"Sequence number": 5516375, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6630 + } + }, + { + "ph": "f", "id": 312, "pid": 2338711, "tid": 2379440, "ts": 6274145111909.860, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145111911.315, "dur": 4.402, + "args": { + "External id": 513512,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145111914.474, "dur": 1.107, + "args": { + "External id": 513513,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145111922.454, "dur": 9.176, + "args": { + "External id": 513514,"Record function id": 0, "Sequence number": 5516374, "Fwd thread id": 1, "Ev Idx": 6633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145111923.603, "dur": 5.524, + "args": { + "External id": 513515,"Sequence number": 5516374, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6634 + } + }, + { + "ph": "f", "id": 313, "pid": 2338711, "tid": 2379440, "ts": 6274145111923.603, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145111924.867, "dur": 4.095, + "args": { + "External id": 513516,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145111925.685, "dur": 3.136, + "args": { + "External id": 513517,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145111938.644, "dur": 230.808, + "args": { + "External id": 513518,"Record function id": 0, "Sequence number": 5516373, "Fwd thread id": 1, "Ev Idx": 6637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145111939.628, "dur": 220.164, + "args": { + "External id": 513519,"Sequence number": 5516373, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6638 + } + }, + { + "ph": "f", "id": 314, "pid": 2338711, "tid": 2379440, "ts": 6274145111939.628, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145111943.810, "dur": 10.469, + "args": { + "External id": 513520,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145111946.519, "dur": 7.061, + "args": { + "External id": 513521,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145111951.237, "dur": 2.033, + "args": { + "External id": 513522,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145111956.214, "dur": 81.229, + "args": { + "External id": 513523,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145112038.647, "dur": 10.717, + "args": { + "External id": 513524,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145112039.834, "dur": 8.741, + "args": { + "External id": 513525,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145112046.365, "dur": 2.031, + "args": { + "External id": 513526,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145112051.075, "dur": 45.850, + "args": { + "External id": 513527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145112054.621, "dur": 41.456, + "args": { + "External id": 513528,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145112094.657, "dur": 0.859, + "args": { + "External id": 513529,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145112098.292, "dur": 60.097, + "args": { + "External id": 513530,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145112177.759, "dur": 9.094, + "args": { + "External id": 513531,"Record function id": 0, "Sequence number": 5516372, "Fwd thread id": 1, "Ev Idx": 6650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145112178.735, "dur": 5.840, + "args": { + "External id": 513532,"Sequence number": 5516372, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6651 + } + }, + { + "ph": "f", "id": 315, "pid": 2338711, "tid": 2379440, "ts": 6274145112178.735, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145112181.446, "dur": 3.000, + "args": { + "External id": 513533,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145112182.618, "dur": 1.711, + "args": { + "External id": 513534,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145112190.546, "dur": 39.726, + "args": { + "External id": 513535,"Record function id": 0, "Sequence number": 5516371, "Fwd thread id": 1, "Ev Idx": 6654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145112193.928, "dur": 33.803, + "args": { + "External id": 513536,"Sequence number": 5516371, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6655 + } + }, + { + "ph": "f", "id": 316, "pid": 2338711, "tid": 2379440, "ts": 6274145112193.928, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145112222.872, "dur": 4.645, + "args": { + "External id": 513537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145112223.926, "dur": 3.054, + "args": { + "External id": 513538,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145112226.397, "dur": 0.362, + "args": { + "External id": 513539,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145112236.634, "dur": 12.065, + "args": { + "External id": 513540,"Record function id": 0, "Ev Idx": 6659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145112238.130, "dur": 9.586, + "args": { + "External id": 513541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145112241.040, "dur": 6.201, + "args": { + "External id": 513542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145112242.300, "dur": 4.838, + "args": { + "External id": 513543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145112251.887, "dur": 8.715, + "args": { + "External id": 513544,"Record function id": 0, "Sequence number": 5516370, "Fwd thread id": 1, "Ev Idx": 6663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145112253.022, "dur": 5.097, + "args": { + "External id": 513545,"Sequence number": 5516370, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6664 + } + }, + { + "ph": "f", "id": 317, "pid": 2338711, "tid": 2379440, "ts": 6274145112253.022, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145112253.895, "dur": 4.080, + "args": { + "External id": 513546,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145112256.750, "dur": 1.097, + "args": { + "External id": 513547,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145112263.780, "dur": 98.831, + "args": { + "External id": 513548,"Record function id": 0, "Sequence number": 5516369, "Fwd thread id": 1, "Ev Idx": 6667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145112264.562, "dur": 90.231, + "args": { + "External id": 513549,"Sequence number": 5516369, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6668 + } + }, + { + "ph": "f", "id": 318, "pid": 2338711, "tid": 2379440, "ts": 6274145112264.562, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145112266.697, "dur": 2.408, + "args": { + "External id": 513550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145112267.258, "dur": 1.387, + "args": { + "External id": 513551,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145112268.181, "dur": 0.333, + "args": { + "External id": 513552,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145112272.203, "dur": 32.772, + "args": { + "External id": 513553,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145112306.015, "dur": 3.088, + "args": { + "External id": 513554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145112306.583, "dur": 1.954, + "args": { + "External id": 513555,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145112307.960, "dur": 0.473, + "args": { + "External id": 513556,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145112310.386, "dur": 5.658, + "args": { + "External id": 513557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145112311.092, "dur": 4.481, + "args": { + "External id": 513558,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145112315.119, "dur": 0.391, + "args": { + "External id": 513559,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145112316.487, "dur": 37.648, + "args": { + "External id": 513560,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145112367.473, "dur": 38.291, + "args": { + "External id": 513561,"Record function id": 0, "Sequence number": 5516368, "Fwd thread id": 1, "Ev Idx": 6680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145112368.435, "dur": 4.107, + "args": { + "External id": 513562,"Sequence number": 5516368, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6681 + } + }, + { + "ph": "f", "id": 319, "pid": 2338711, "tid": 2379440, "ts": 6274145112368.435, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145112370.361, "dur": 2.044, + "args": { + "External id": 513563,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145112371.231, "dur": 1.037, + "args": { + "External id": 513564,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2379440, + "ts": 6274145112375.786, "dur": 26.907, + "args": { + "External id": 513565,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145112409.706, "dur": 10.106, + "args": { + "External id": 513566,"Record function id": 0, "Sequence number": 5516367, "Fwd thread id": 1, "Ev Idx": 6685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145112410.490, "dur": 7.041, + "args": { + "External id": 513567,"Sequence number": 5516367, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6686 + } + }, + { + "ph": "f", "id": 320, "pid": 2338711, "tid": 2379440, "ts": 6274145112410.490, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145112411.339, "dur": 5.982, + "args": { + "External id": 513568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145112412.390, "dur": 4.372, + "args": { + "External id": 513569,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145112416.131, "dur": 0.506, + "args": { + "External id": 513570,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145112423.979, "dur": 4.933, + "args": { + "External id": 513571,"Record function id": 0, "Ev Idx": 6690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145112425.584, "dur": 2.858, + "args": { + "External id": 513572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145112426.566, "dur": 1.600, + "args": { + "External id": 513573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145112427.088, "dur": 0.945, + "args": { + "External id": 513574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145112432.253, "dur": 6.268, + "args": { + "External id": 513575,"Record function id": 0, "Sequence number": 5516366, "Fwd thread id": 1, "Ev Idx": 6694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145112433.368, "dur": 2.974, + "args": { + "External id": 513576,"Sequence number": 5516366, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6695 + } + }, + { + "ph": "f", "id": 321, "pid": 2338711, "tid": 2379440, "ts": 6274145112433.368, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145112434.231, "dur": 1.967, + "args": { + "External id": 513577,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145112435.236, "dur": 0.862, + "args": { + "External id": 513578,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145112441.788, "dur": 102.415, + "args": { + "External id": 513579,"Record function id": 0, "Sequence number": 5516365, "Fwd thread id": 1, "Ev Idx": 6698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145112445.119, "dur": 91.127, + "args": { + "External id": 513580,"Sequence number": 5516365, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6699 + } + }, + { + "ph": "f", "id": 322, "pid": 2338711, "tid": 2379440, "ts": 6274145112445.119, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145112447.262, "dur": 2.638, + "args": { + "External id": 513581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145112447.891, "dur": 1.547, + "args": { + "External id": 513582,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145112448.988, "dur": 0.330, + "args": { + "External id": 513583,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145112450.479, "dur": 37.840, + "args": { + "External id": 513584,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145112489.272, "dur": 5.493, + "args": { + "External id": 513585,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145112489.873, "dur": 4.312, + "args": { + "External id": 513586,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145112493.568, "dur": 0.505, + "args": { + "External id": 513587,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145112495.857, "dur": 3.298, + "args": { + "External id": 513588,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145112496.681, "dur": 2.051, + "args": { + "External id": 513589,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145112498.388, "dur": 0.277, + "args": { + "External id": 513590,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145112499.552, "dur": 35.916, + "args": { + "External id": 513591,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145112548.824, "dur": 26.631, + "args": { + "External id": 513592,"Record function id": 0, "Sequence number": 5516364, "Fwd thread id": 1, "Ev Idx": 6711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145112549.665, "dur": 6.167, + "args": { + "External id": 513593,"Sequence number": 5516364, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6712 + } + }, + { + "ph": "f", "id": 323, "pid": 2338711, "tid": 2379440, "ts": 6274145112549.665, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145112553.415, "dur": 2.248, + "args": { + "External id": 513594,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145112554.543, "dur": 1.019, + "args": { + "External id": 513595,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145112558.292, "dur": 15.016, + "args": { + "External id": 513596,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145112579.175, "dur": 24.102, + "args": { + "External id": 513597,"Record function id": 0, "Sequence number": 5516363, "Fwd thread id": 1, "Ev Idx": 6716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145112580.097, "dur": 3.969, + "args": { + "External id": 513598,"Sequence number": 5516363, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6717 + } + }, + { + "ph": "f", "id": 324, "pid": 2338711, "tid": 2379440, "ts": 6274145112580.097, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145112580.766, "dur": 3.098, + "args": { + "External id": 513599,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145112581.624, "dur": 1.747, + "args": { + "External id": 513600,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145112582.768, "dur": 0.511, + "args": { + "External id": 513601,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145112613.612, "dur": 11.766, + "args": { + "External id": 513602,"Record function id": 0, "Ev Idx": 6721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145112616.245, "dur": 8.171, + "args": { + "External id": 513603,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145112618.153, "dur": 5.533, + "args": { + "External id": 513604,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145112621.568, "dur": 1.963, + "args": { + "External id": 513605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145112631.498, "dur": 390.953, + "args": { + "External id": 513606,"Record function id": 0, "Sequence number": 5516362, "Fwd thread id": 1, "Ev Idx": 6725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145112633.548, "dur": 358.296, + "args": { + "External id": 513607,"Sequence number": 5516362, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6726 + } + }, + { + "ph": "f", "id": 325, "pid": 2338711, "tid": 2379440, "ts": 6274145112633.548, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145112677.889, "dur": 3.622, + "args": { + "External id": 513608,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145112679.723, "dur": 1.568, + "args": { + "External id": 513609,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145112697.653, "dur": 5.252, + "args": { + "External id": 513610,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145112713.452, "dur": 2.451, + "args": { + "External id": 513611,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145112884.013, "dur": 2.025, + "args": { + "External id": 513612,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145112890.096, "dur": 40.442, + "args": { + "External id": 513613,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145112904.443, "dur": 1.332, + "args": { + "External id": 513614,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145112936.157, "dur": 30.296, + "args": { + "External id": 513615,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145112938.098, "dur": 28.146, + "args": { + "External id": 513616,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145112942.382, "dur": 4.584, + "args": { + "External id": 513617,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145112948.900, "dur": 16.680, + "args": { + "External id": 513618,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274145112971.591, "dur": 2.657, + "args": { + "External id": 513619,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145112972.982, "dur": 1.121, + "args": { + "External id": 513620,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145112982.361, "dur": 2.358, + "args": { + "External id": 513621,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145112983.742, "dur": 0.838, + "args": { + "External id": 513622,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145113003.229, "dur": 15.440, + "args": { + "External id": 513623,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145113031.994, "dur": 8.722, + "args": { + "External id": 513624,"Record function id": 0, "Ev Idx": 6743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145113034.136, "dur": 5.713, + "args": { + "External id": 513625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145113036.391, "dur": 2.505, + "args": { + "External id": 513626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145113037.469, "dur": 1.307, + "args": { + "External id": 513627,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145113045.138, "dur": 9.325, + "args": { + "External id": 513628,"Record function id": 0, "Sequence number": 5516361, "Fwd thread id": 1, "Ev Idx": 6747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145113049.156, "dur": 1.696, + "args": { + "External id": 513629,"Sequence number": 5516361, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6748 + } + }, + { + "ph": "f", "id": 326, "pid": 2338711, "tid": 2379440, "ts": 6274145113049.156, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145113104.615, "dur": 438.602, + "args": { + "External id": 513630,"Record function id": 0, "Sequence number": 5516360, "Fwd thread id": 1, "Ev Idx": 6749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145113109.414, "dur": 422.949, + "args": { + "External id": 513631,"Sequence number": 5516360, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6750 + } + }, + { + "ph": "f", "id": 327, "pid": 2338711, "tid": 2379440, "ts": 6274145113109.414, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145113144.376, "dur": 10.559, + "args": { + "External id": 513632,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338711, "tid": 2379440, + "ts": 6274145113150.151, "dur": 4.302, + "args": { + "External id": 513633,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 6752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145113158.307, "dur": 7.878, + "args": { + "External id": 513634,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145113160.248, "dur": 5.149, + "args": { + "External id": 513635,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145113164.317, "dur": 0.894, + "args": { + "External id": 513636,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2379440, + "ts": 6274145113169.844, "dur": 104.502, + "args": { + "External id": 513637,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 6756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145113170.861, "dur": 3.209, + "args": { + "External id": 513638,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 6757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145113171.754, "dur": 1.769, + "args": { + "External id": 513639,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145113172.850, "dur": 0.568, + "args": { + "External id": 513640,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2379440, + "ts": 6274145113175.190, "dur": 98.481, + "args": { + "External id": 513641,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145113179.178, "dur": 93.409, + "args": { + "External id": 513642,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274145113278.576, "dur": 3.953, + "args": { + "External id": 513643,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 6762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145113280.607, "dur": 1.782, + "args": { + "External id": 513644,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145113316.700, "dur": 5.005, + "args": { + "External id": 513645,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145113323.144, "dur": 6.338, + "args": { + "External id": 513646,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145113330.585, "dur": 1.631, + "args": { + "External id": 513647,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145113367.589, "dur": 2.327, + "args": { + "External id": 513648,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145113368.414, "dur": 1.338, + "args": { + "External id": 513649,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338711, "tid": 2379440, + "ts": 6274145113392.238, "dur": 122.250, + "args": { + "External id": 513650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 6769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2379440, + "ts": 6274145113398.389, "dur": 5.747, + "args": { + "External id": 513651,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145113402.333, "dur": 0.981, + "args": { + "External id": 513652,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274145113405.949, "dur": 5.950, + "args": { + "External id": 513653,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 6772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145113410.679, "dur": 0.510, + "args": { + "External id": 513654,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 6773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2379440, + "ts": 6274145113413.312, "dur": 2.520, + "args": { + "External id": 513655,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145113414.792, "dur": 0.588, + "args": { + "External id": 513656,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274145113416.939, "dur": 3.213, + "args": { + "External id": 513657,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145113418.935, "dur": 0.766, + "args": { + "External id": 513658,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 6777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274145113426.305, "dur": 2.855, + "args": { + "External id": 513659,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 6778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145113428.172, "dur": 0.697, + "args": { + "External id": 513660,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 6779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145113430.336, "dur": 8.651, + "args": { + "External id": 513661,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 6780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338711, "tid": 2379440, + "ts": 6274145113434.232, "dur": 4.558, + "args": { + "External id": 513662,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 6781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274145113439.912, "dur": 2.295, + "args": { + "External id": 513663,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 6782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145113441.533, "dur": 0.408, + "args": { + "External id": 513664,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 6783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145113443.473, "dur": 2.678, + "args": { + "External id": 513665,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145113444.366, "dur": 1.702, + "args": { + "External id": 513666,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274145113447.541, "dur": 53.406, + "args": { + "External id": 513667,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 6786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145113505.312, "dur": 1.074, + "args": { + "External id": 513668,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 6787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274145113507.445, "dur": 2.991, + "args": { + "External id": 513669,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 6788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145113509.202, "dur": 0.470, + "args": { + "External id": 513670,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 6789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145113512.459, "dur": 0.882, + "args": { + "External id": 513671,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 6790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145113552.620, "dur": 8.727, + "args": { + "External id": 513672,"Record function id": 0, "Ev Idx": 6791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145113554.744, "dur": 5.936, + "args": { + "External id": 513673,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145113556.799, "dur": 3.068, + "args": { + "External id": 513674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145113557.907, "dur": 1.837, + "args": { + "External id": 513675,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145113565.220, "dur": 9.860, + "args": { + "External id": 513676,"Record function id": 0, "Sequence number": 5516359, "Fwd thread id": 1, "Ev Idx": 6795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145113566.192, "dur": 6.717, + "args": { + "External id": 513677,"Sequence number": 5516359, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6796 + } + }, + { + "ph": "f", "id": 328, "pid": 2338711, "tid": 2379440, "ts": 6274145113566.192, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145113568.547, "dur": 4.144, + "args": { + "External id": 513678,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145113571.804, "dur": 0.750, + "args": { + "External id": 513679,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145113578.652, "dur": 157.386, + "args": { + "External id": 513680,"Record function id": 0, "Sequence number": 5516358, "Fwd thread id": 1, "Ev Idx": 6799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145113579.326, "dur": 147.906, + "args": { + "External id": 513681,"Sequence number": 5516358, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6800 + } + }, + { + "ph": "f", "id": 329, "pid": 2338711, "tid": 2379440, "ts": 6274145113579.326, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145113582.903, "dur": 25.290, + "args": { + "External id": 513682,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145113584.132, "dur": 22.964, + "args": { + "External id": 513683,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145113585.710, "dur": 20.440, + "args": { + "External id": 513684,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145113613.427, "dur": 62.674, + "args": { + "External id": 513685,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145113677.710, "dur": 4.978, + "args": { + "External id": 513686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145113678.785, "dur": 3.171, + "args": { + "External id": 513687,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145113680.854, "dur": 0.931, + "args": { + "External id": 513688,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145113684.120, "dur": 5.861, + "args": { + "External id": 513689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145113685.088, "dur": 4.177, + "args": { + "External id": 513690,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145113688.886, "dur": 0.313, + "args": { + "External id": 513691,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145113690.733, "dur": 35.546, + "args": { + "External id": 513692,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145113743.731, "dur": 7.832, + "args": { + "External id": 513693,"Record function id": 0, "Sequence number": 5516357, "Fwd thread id": 1, "Ev Idx": 6812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145113744.950, "dur": 4.858, + "args": { + "External id": 513694,"Sequence number": 5516357, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6813 + } + }, + { + "ph": "f", "id": 330, "pid": 2338711, "tid": 2379440, "ts": 6274145113744.950, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145113747.046, "dur": 2.616, + "args": { + "External id": 513695,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145113748.172, "dur": 1.351, + "args": { + "External id": 513696,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145113755.235, "dur": 9.670, + "args": { + "External id": 513697,"Record function id": 0, "Sequence number": 5516356, "Fwd thread id": 1, "Ev Idx": 6816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145113756.154, "dur": 6.888, + "args": { + "External id": 513698,"Sequence number": 5516356, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6817 + } + }, + { + "ph": "f", "id": 331, "pid": 2338711, "tid": 2379440, "ts": 6274145113756.154, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145113757.162, "dur": 5.695, + "args": { + "External id": 513699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145113758.282, "dur": 4.007, + "args": { + "External id": 513700,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145113761.493, "dur": 0.685, + "args": { + "External id": 513701,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145113769.056, "dur": 6.081, + "args": { + "External id": 513702,"Record function id": 0, "Ev Idx": 6821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145113770.433, "dur": 4.156, + "args": { + "External id": 513703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145113771.894, "dur": 2.411, + "args": { + "External id": 513704,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145113772.704, "dur": 1.500, + "args": { + "External id": 513705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145113778.389, "dur": 6.939, + "args": { + "External id": 513706,"Record function id": 0, "Sequence number": 5516355, "Fwd thread id": 1, "Ev Idx": 6825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145113779.685, "dur": 3.652, + "args": { + "External id": 513707,"Sequence number": 5516355, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6826 + } + }, + { + "ph": "f", "id": 332, "pid": 2338711, "tid": 2379440, "ts": 6274145113779.685, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145113781.120, "dur": 2.078, + "args": { + "External id": 513708,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145113782.126, "dur": 0.983, + "args": { + "External id": 513709,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145113788.344, "dur": 99.095, + "args": { + "External id": 513710,"Record function id": 0, "Sequence number": 5516354, "Fwd thread id": 1, "Ev Idx": 6829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145113789.282, "dur": 90.114, + "args": { + "External id": 513711,"Sequence number": 5516354, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6830 + } + }, + { + "ph": "f", "id": 333, "pid": 2338711, "tid": 2379440, "ts": 6274145113789.282, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145113793.341, "dur": 5.329, + "args": { + "External id": 513712,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145113794.111, "dur": 4.072, + "args": { + "External id": 513713,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145113795.328, "dur": 2.720, + "args": { + "External id": 513714,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145113799.166, "dur": 34.083, + "args": { + "External id": 513715,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145113834.466, "dur": 6.550, + "args": { + "External id": 513716,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145113835.233, "dur": 5.231, + "args": { + "External id": 513717,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145113839.280, "dur": 1.076, + "args": { + "External id": 513718,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145113842.279, "dur": 3.765, + "args": { + "External id": 513719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145113843.226, "dur": 2.377, + "args": { + "External id": 513720,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145113844.965, "dur": 0.573, + "args": { + "External id": 513721,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145113846.604, "dur": 31.937, + "args": { + "External id": 513722,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145113891.892, "dur": 36.803, + "args": { + "External id": 513723,"Record function id": 0, "Sequence number": 5516353, "Fwd thread id": 1, "Ev Idx": 6842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145113892.832, "dur": 6.257, + "args": { + "External id": 513724,"Sequence number": 5516353, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6843 + } + }, + { + "ph": "f", "id": 334, "pid": 2338711, "tid": 2379440, "ts": 6274145113892.832, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145113894.871, "dur": 4.087, + "args": { + "External id": 513725,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145113897.599, "dur": 1.217, + "args": { + "External id": 513726,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2379440, + "ts": 6274145113902.212, "dur": 23.792, + "args": { + "External id": 513727,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145113932.406, "dur": 8.088, + "args": { + "External id": 513728,"Record function id": 0, "Sequence number": 5516352, "Fwd thread id": 1, "Ev Idx": 6847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145113933.515, "dur": 4.733, + "args": { + "External id": 513729,"Sequence number": 5516352, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6848 + } + }, + { + "ph": "f", "id": 335, "pid": 2338711, "tid": 2379440, "ts": 6274145113933.515, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145113934.634, "dur": 3.384, + "args": { + "External id": 513730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145113935.533, "dur": 1.984, + "args": { + "External id": 513731,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145113936.984, "dur": 0.423, + "args": { + "External id": 513732,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145113944.553, "dur": 10.387, + "args": { + "External id": 513733,"Record function id": 0, "Ev Idx": 6852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145113946.261, "dur": 8.222, + "args": { + "External id": 513734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145113947.517, "dur": 6.442, + "args": { + "External id": 513735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145113950.816, "dur": 3.026, + "args": { + "External id": 513736,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145113959.243, "dur": 483.898, + "args": { + "External id": 513737,"Record function id": 0, "Sequence number": 5516351, "Fwd thread id": 1, "Ev Idx": 6856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145113960.834, "dur": 444.657, + "args": { + "External id": 513738,"Sequence number": 5516351, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 6857 + } + }, + { + "ph": "f", "id": 336, "pid": 2338711, "tid": 2379440, "ts": 6274145113960.834, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2379440, + "ts": 6274145113986.745, "dur": 39.622, + "args": { + "External id": 513739,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274145113988.650, "dur": 37.518, + "args": { + "External id": 513740,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145113991.692, "dur": 11.024, + "args": { + "External id": 513741,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 6860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145113999.197, "dur": 3.009, + "args": { + "External id": 513742,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145114004.201, "dur": 21.425, + "args": { + "External id": 513743,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145114038.565, "dur": 2.260, + "args": { + "External id": 513744,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145114039.357, "dur": 1.304, + "args": { + "External id": 513745,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145114046.964, "dur": 1.854, + "args": { + "External id": 513746,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145114047.817, "dur": 0.893, + "args": { + "External id": 513747,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145114098.110, "dur": 4.084, + "args": { + "External id": 513748,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145114114.769, "dur": 2.525, + "args": { + "External id": 513749,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145114286.144, "dur": 3.877, + "args": { + "External id": 513750,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145114294.648, "dur": 39.621, + "args": { + "External id": 513751,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145114305.065, "dur": 3.841, + "args": { + "External id": 513752,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145114339.921, "dur": 31.869, + "args": { + "External id": 513753,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145114341.897, "dur": 29.688, + "args": { + "External id": 513754,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145114346.587, "dur": 5.499, + "args": { + "External id": 513755,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145114353.629, "dur": 17.304, + "args": { + "External id": 513756,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274145114376.424, "dur": 5.326, + "args": { + "External id": 513757,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145114380.345, "dur": 1.279, + "args": { + "External id": 513758,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145114388.753, "dur": 2.444, + "args": { + "External id": 513759,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145114389.813, "dur": 1.273, + "args": { + "External id": 513760,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145114393.990, "dur": 2.547, + "args": { + "External id": 513761,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145114394.794, "dur": 1.658, + "args": { + "External id": 513762,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145114421.461, "dur": 20.087, + "args": { + "External id": 513763,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145114457.069, "dur": 12.545, + "args": { + "External id": 513764,"Record function id": 0, "Ev Idx": 6883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145114459.764, "dur": 9.137, + "args": { + "External id": 513765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145114462.374, "dur": 5.483, + "args": { + "External id": 513766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145114465.999, "dur": 1.691, + "args": { + "External id": 513767,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145114473.313, "dur": 8.529, + "args": { + "External id": 513768,"Record function id": 0, "Sequence number": 5516350, "Fwd thread id": 1, "Ev Idx": 6887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145114474.688, "dur": 4.259, + "args": { + "External id": 513769,"Sequence number": 5516350, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6888 + } + }, + { + "ph": "f", "id": 337, "pid": 2338711, "tid": 2379440, "ts": 6274145114474.688, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145114476.541, "dur": 2.167, + "args": { + "External id": 513770,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145114477.343, "dur": 1.213, + "args": { + "External id": 513771,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145114485.061, "dur": 190.588, + "args": { + "External id": 513772,"Record function id": 0, "Sequence number": 5516349, "Fwd thread id": 1, "Ev Idx": 6891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145114485.937, "dur": 177.905, + "args": { + "External id": 513773,"Sequence number": 5516349, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6892 + } + }, + { + "ph": "f", "id": 338, "pid": 2338711, "tid": 2379440, "ts": 6274145114485.937, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145114489.088, "dur": 7.501, + "args": { + "External id": 513774,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145114490.577, "dur": 5.436, + "args": { + "External id": 513775,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145114494.741, "dur": 1.030, + "args": { + "External id": 513776,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145114497.741, "dur": 71.641, + "args": { + "External id": 513777,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145114570.888, "dur": 5.712, + "args": { + "External id": 513778,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145114571.579, "dur": 4.423, + "args": { + "External id": 513779,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145114573.791, "dur": 2.044, + "args": { + "External id": 513780,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145114577.935, "dur": 5.805, + "args": { + "External id": 513781,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145114581.494, "dur": 1.761, + "args": { + "External id": 513782,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145114582.745, "dur": 0.423, + "args": { + "External id": 513783,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145114584.315, "dur": 77.719, + "args": { + "External id": 513784,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145114686.543, "dur": 14.395, + "args": { + "External id": 513785,"Record function id": 0, "Sequence number": 5516348, "Fwd thread id": 1, "Ev Idx": 6904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145114688.145, "dur": 10.599, + "args": { + "External id": 513786,"Sequence number": 5516348, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6905 + } + }, + { + "ph": "f", "id": 339, "pid": 2338711, "tid": 2379440, "ts": 6274145114688.145, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145114691.391, "dur": 7.171, + "args": { + "External id": 513787,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145114693.440, "dur": 4.832, + "args": { + "External id": 513788,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145114705.949, "dur": 14.435, + "args": { + "External id": 513789,"Record function id": 0, "Sequence number": 5516347, "Fwd thread id": 1, "Ev Idx": 6908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145114707.284, "dur": 10.193, + "args": { + "External id": 513790,"Sequence number": 5516347, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6909 + } + }, + { + "ph": "f", "id": 340, "pid": 2338711, "tid": 2379440, "ts": 6274145114707.284, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145114709.145, "dur": 8.096, + "args": { + "External id": 513791,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145114713.020, "dur": 3.584, + "args": { + "External id": 513792,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145114715.770, "dur": 0.599, + "args": { + "External id": 513793,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145114724.910, "dur": 7.340, + "args": { + "External id": 513794,"Record function id": 0, "Ev Idx": 6913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145114726.775, "dur": 4.965, + "args": { + "External id": 513795,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145114728.569, "dur": 2.778, + "args": { + "External id": 513796,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145114729.652, "dur": 1.594, + "args": { + "External id": 513797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145114735.084, "dur": 6.625, + "args": { + "External id": 513798,"Record function id": 0, "Sequence number": 5516346, "Fwd thread id": 1, "Ev Idx": 6917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145114736.093, "dur": 3.507, + "args": { + "External id": 513799,"Sequence number": 5516346, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6918 + } + }, + { + "ph": "f", "id": 341, "pid": 2338711, "tid": 2379440, "ts": 6274145114736.093, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145114736.946, "dur": 2.502, + "args": { + "External id": 513800,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145114738.056, "dur": 1.253, + "args": { + "External id": 513801,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145114746.119, "dur": 423.954, + "args": { + "External id": 513802,"Record function id": 0, "Sequence number": 5516345, "Fwd thread id": 1, "Ev Idx": 6921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145114747.663, "dur": 401.262, + "args": { + "External id": 513803,"Sequence number": 5516345, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6922 + } + }, + { + "ph": "f", "id": 342, "pid": 2338711, "tid": 2379440, "ts": 6274145114747.663, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145114768.005, "dur": 9.255, + "args": { + "External id": 513804,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145114771.816, "dur": 4.890, + "args": { + "External id": 513805,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145114779.547, "dur": 3.826, + "args": { + "External id": 513806,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145114781.525, "dur": 1.659, + "args": { + "External id": 513807,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145114784.869, "dur": 4.104, + "args": { + "External id": 513808,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145114786.610, "dur": 2.191, + "args": { + "External id": 513809,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145114816.900, "dur": 302.279, + "args": { + "External id": 513810,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 6929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145114903.592, "dur": 3.541, + "args": { + "External id": 513811,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145114909.446, "dur": 4.637, + "args": { + "External id": 513812,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145114915.484, "dur": 2.787, + "args": { + "External id": 513813,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145114919.782, "dur": 2.672, + "args": { + "External id": 513814,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145114974.961, "dur": 3.214, + "args": { + "External id": 513815,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145114976.298, "dur": 1.781, + "args": { + "External id": 513816,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145114982.234, "dur": 30.765, + "args": { + "External id": 513817,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145114987.881, "dur": 2.321, + "args": { + "External id": 513818,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145115014.395, "dur": 1.664, + "args": { + "External id": 513819,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145115015.151, "dur": 0.814, + "args": { + "External id": 513820,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145115016.863, "dur": 16.126, + "args": { + "External id": 513821,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145115019.060, "dur": 0.517, + "args": { + "External id": 513822,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274145115135.533, "dur": 4.513, + "args": { + "External id": 513823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274145115143.257, "dur": 0.589, + "args": { + "External id": 513824,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274145115145.765, "dur": 0.701, + "args": { + "External id": 513825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145115182.004, "dur": 252.534, + "args": { + "External id": 513826,"Record function id": 0, "Sequence number": 5516344, "Fwd thread id": 1, "Ev Idx": 6945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145115183.803, "dur": 244.244, + "args": { + "External id": 513827,"Sequence number": 5516344, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6946 + } + }, + { + "ph": "f", "id": 343, "pid": 2338711, "tid": 2379440, "ts": 6274145115183.803, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274145115206.155, "dur": 54.857, + "args": { + "External id": 513828,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145115210.482, "dur": 4.527, + "args": { + "External id": 513829,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145115216.516, "dur": 43.764, + "args": { + "External id": 513830,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 6949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145115271.611, "dur": 5.343, + "args": { + "External id": 513831,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145115274.069, "dur": 2.552, + "args": { + "External id": 513832,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145115441.409, "dur": 212.107, + "args": { + "External id": 513833,"Record function id": 0, "Sequence number": 5516343, "Fwd thread id": 1, "Ev Idx": 6952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145115443.291, "dur": 200.093, + "args": { + "External id": 513834,"Sequence number": 5516343, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6953 + } + }, + { + "ph": "f", "id": 344, "pid": 2338711, "tid": 2379440, "ts": 6274145115443.291, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274145115455.309, "dur": 52.142, + "args": { + "External id": 513835,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145115462.265, "dur": 3.232, + "args": { + "External id": 513836,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145115468.689, "dur": 38.079, + "args": { + "External id": 513837,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 6956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145115515.252, "dur": 8.084, + "args": { + "External id": 513838,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145115517.602, "dur": 5.387, + "args": { + "External id": 513839,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145115665.692, "dur": 21.929, + "args": { + "External id": 513840,"Record function id": 0, "Sequence number": 5516342, "Fwd thread id": 1, "Ev Idx": 6959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145115671.032, "dur": 13.059, + "args": { + "External id": 513841,"Sequence number": 5516342, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6960 + } + }, + { + "ph": "f", "id": 345, "pid": 2338711, "tid": 2379440, "ts": 6274145115671.032, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145115675.277, "dur": 8.343, + "args": { + "External id": 513842,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145115677.184, "dur": 6.232, + "args": { + "External id": 513843,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145115691.264, "dur": 6.180, + "args": { + "External id": 513844,"Record function id": 0, "Sequence number": 5516341, "Fwd thread id": 1, "Ev Idx": 6963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145115692.204, "dur": 3.345, + "args": { + "External id": 513845,"Sequence number": 5516341, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6964 + } + }, + { + "ph": "f", "id": 346, "pid": 2338711, "tid": 2379440, "ts": 6274145115692.204, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145115693.407, "dur": 1.996, + "args": { + "External id": 513846,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145115694.470, "dur": 0.812, + "args": { + "External id": 513847,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145115700.712, "dur": 9.296, + "args": { + "External id": 513848,"Record function id": 0, "Sequence number": 5516340, "Fwd thread id": 1, "Ev Idx": 6967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145115701.597, "dur": 6.645, + "args": { + "External id": 513849,"Sequence number": 5516340, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6968 + } + }, + { + "ph": "f", "id": 347, "pid": 2338711, "tid": 2379440, "ts": 6274145115701.597, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145115703.261, "dur": 4.846, + "args": { + "External id": 513850,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145115707.130, "dur": 0.856, + "args": { + "External id": 513851,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145115714.070, "dur": 5.812, + "args": { + "External id": 513852,"Record function id": 0, "Sequence number": 5516339, "Fwd thread id": 1, "Ev Idx": 6971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145115715.004, "dur": 2.995, + "args": { + "External id": 513853,"Sequence number": 5516339, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6972 + } + }, + { + "ph": "f", "id": 348, "pid": 2338711, "tid": 2379440, "ts": 6274145115715.004, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145115715.958, "dur": 1.913, + "args": { + "External id": 513854,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145115716.857, "dur": 0.884, + "args": { + "External id": 513855,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145115723.709, "dur": 162.957, + "args": { + "External id": 513856,"Record function id": 0, "Sequence number": 5516338, "Fwd thread id": 1, "Ev Idx": 6975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145115724.612, "dur": 154.207, + "args": { + "External id": 513857,"Sequence number": 5516338, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6976 + } + }, + { + "ph": "f", "id": 349, "pid": 2338711, "tid": 2379440, "ts": 6274145115724.612, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145115728.413, "dur": 9.326, + "args": { + "External id": 513858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145115730.803, "dur": 6.253, + "args": { + "External id": 513859,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145115735.310, "dur": 1.508, + "args": { + "External id": 513860,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145115739.343, "dur": 76.140, + "args": { + "External id": 513861,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145115816.809, "dur": 4.785, + "args": { + "External id": 513862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145115817.630, "dur": 3.105, + "args": { + "External id": 513863,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145115819.381, "dur": 1.237, + "args": { + "External id": 513864,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145115823.087, "dur": 8.092, + "args": { + "External id": 513865,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145115824.193, "dur": 6.525, + "args": { + "External id": 513866,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145115827.510, "dur": 3.115, + "args": { + "External id": 513867,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145115831.727, "dur": 46.352, + "args": { + "External id": 513868,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145115892.052, "dur": 7.842, + "args": { + "External id": 513869,"Record function id": 0, "Sequence number": 5516337, "Fwd thread id": 1, "Ev Idx": 6988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145115893.078, "dur": 4.785, + "args": { + "External id": 513870,"Sequence number": 5516337, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6989 + } + }, + { + "ph": "f", "id": 350, "pid": 2338711, "tid": 2379440, "ts": 6274145115893.078, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145115894.974, "dur": 2.746, + "args": { + "External id": 513871,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145115896.127, "dur": 1.481, + "args": { + "External id": 513872,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145115903.586, "dur": 9.232, + "args": { + "External id": 513873,"Record function id": 0, "Sequence number": 5516336, "Fwd thread id": 1, "Ev Idx": 6992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145115904.401, "dur": 6.873, + "args": { + "External id": 513874,"Sequence number": 5516336, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6993 + } + }, + { + "ph": "f", "id": 351, "pid": 2338711, "tid": 2379440, "ts": 6274145115904.401, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145115905.334, "dur": 5.718, + "args": { + "External id": 513875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145115905.850, "dur": 4.606, + "args": { + "External id": 513876,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145115909.612, "dur": 0.754, + "args": { + "External id": 513877,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145115918.873, "dur": 9.753, + "args": { + "External id": 513878,"Record function id": 0, "Ev Idx": 6997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145115920.586, "dur": 7.210, + "args": { + "External id": 513879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145115923.332, "dur": 4.144, + "args": { + "External id": 513880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145115924.710, "dur": 2.629, + "args": { + "External id": 513881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145115932.120, "dur": 5.728, + "args": { + "External id": 513882,"Record function id": 0, "Sequence number": 5516335, "Fwd thread id": 1, "Ev Idx": 7001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145115932.919, "dur": 2.846, + "args": { + "External id": 513883,"Sequence number": 5516335, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 7002 + } + }, + { + "ph": "f", "id": 352, "pid": 2338711, "tid": 2379440, "ts": 6274145115932.919, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145115934.099, "dur": 1.520, + "args": { + "External id": 513884,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145115934.546, "dur": 0.927, + "args": { + "External id": 513885,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145115941.196, "dur": 156.820, + "args": { + "External id": 513886,"Record function id": 0, "Sequence number": 5516334, "Fwd thread id": 1, "Ev Idx": 7005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145115944.384, "dur": 106.915, + "args": { + "External id": 513887,"Sequence number": 5516334, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7006 + } + }, + { + "ph": "f", "id": 353, "pid": 2338711, "tid": 2379440, "ts": 6274145115944.384, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145115946.179, "dur": 2.520, + "args": { + "External id": 513888,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145115946.774, "dur": 1.478, + "args": { + "External id": 513889,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 7008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145115947.710, "dur": 0.396, + "args": { + "External id": 513890,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 7009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145115949.270, "dur": 28.537, + "args": { + "External id": 513891,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 7010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145115978.692, "dur": 7.601, + "args": { + "External id": 513892,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145115979.458, "dur": 6.372, + "args": { + "External id": 513893,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 7012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145115982.556, "dur": 3.154, + "args": { + "External id": 513894,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 7013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145115987.417, "dur": 3.416, + "args": { + "External id": 513895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145115988.892, "dur": 1.477, + "args": { + "External id": 513896,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145115989.978, "dur": 0.322, + "args": { + "External id": 513897,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145115991.659, "dur": 58.803, + "args": { + "External id": 513898,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 7017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145116105.667, "dur": 39.429, + "args": { + "External id": 513899,"Record function id": 0, "Sequence number": 5516333, "Fwd thread id": 1, "Ev Idx": 7018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145116106.840, "dur": 7.657, + "args": { + "External id": 513900,"Sequence number": 5516333, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7019 + } + }, + { + "ph": "f", "id": 354, "pid": 2338711, "tid": 2379440, "ts": 6274145116106.840, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145116111.390, "dur": 2.958, + "args": { + "External id": 513901,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145116112.387, "dur": 1.867, + "args": { + "External id": 513902,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2379440, + "ts": 6274145116118.324, "dur": 24.069, + "args": { + "External id": 513903,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145116149.226, "dur": 8.173, + "args": { + "External id": 513904,"Record function id": 0, "Sequence number": 5516332, "Fwd thread id": 1, "Ev Idx": 7023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145116150.334, "dur": 5.360, + "args": { + "External id": 513905,"Sequence number": 5516332, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7024 + } + }, + { + "ph": "f", "id": 355, "pid": 2338711, "tid": 2379440, "ts": 6274145116150.334, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145116151.382, "dur": 4.110, + "args": { + "External id": 513906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145116152.636, "dur": 2.298, + "args": { + "External id": 513907,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145116154.361, "dur": 0.438, + "args": { + "External id": 513908,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145116162.086, "dur": 8.733, + "args": { + "External id": 513909,"Record function id": 0, "Ev Idx": 7028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145116163.443, "dur": 6.735, + "args": { + "External id": 513910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145116165.046, "dur": 4.680, + "args": { + "External id": 513911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145116167.945, "dur": 1.631, + "args": { + "External id": 513912,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145116173.935, "dur": 5.586, + "args": { + "External id": 513913,"Record function id": 0, "Sequence number": 5516331, "Fwd thread id": 1, "Ev Idx": 7032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145116174.859, "dur": 3.107, + "args": { + "External id": 513914,"Sequence number": 5516331, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7033 + } + }, + { + "ph": "f", "id": 356, "pid": 2338711, "tid": 2379440, "ts": 6274145116174.859, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145116176.112, "dur": 1.709, + "args": { + "External id": 513915,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145116176.731, "dur": 0.994, + "args": { + "External id": 513916,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145116182.965, "dur": 142.732, + "args": { + "External id": 513917,"Record function id": 0, "Sequence number": 5516330, "Fwd thread id": 1, "Ev Idx": 7036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145116183.878, "dur": 133.772, + "args": { + "External id": 513918,"Sequence number": 5516330, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7037 + } + }, + { + "ph": "f", "id": 357, "pid": 2338711, "tid": 2379440, "ts": 6274145116183.878, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145116185.960, "dur": 7.746, + "args": { + "External id": 513919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145116189.231, "dur": 4.007, + "args": { + "External id": 513920,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 7039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145116190.213, "dur": 2.902, + "args": { + "External id": 513921,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 7040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145116194.316, "dur": 57.512, + "args": { + "External id": 513922,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 7041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145116253.135, "dur": 3.654, + "args": { + "External id": 513923,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145116253.635, "dur": 2.612, + "args": { + "External id": 513924,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145116255.406, "dur": 0.724, + "args": { + "External id": 513925,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145116259.758, "dur": 4.099, + "args": { + "External id": 513926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145116261.342, "dur": 2.007, + "args": { + "External id": 513927,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145116262.779, "dur": 0.487, + "args": { + "External id": 513928,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145116264.249, "dur": 52.725, + "args": { + "External id": 513929,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 7048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145116330.131, "dur": 27.964, + "args": { + "External id": 513930,"Record function id": 0, "Sequence number": 5516329, "Fwd thread id": 1, "Ev Idx": 7049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145116331.252, "dur": 4.696, + "args": { + "External id": 513931,"Sequence number": 5516329, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7050 + } + }, + { + "ph": "f", "id": 358, "pid": 2338711, "tid": 2379440, "ts": 6274145116331.252, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145116333.315, "dur": 2.506, + "args": { + "External id": 513932,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145116334.276, "dur": 1.414, + "args": { + "External id": 513933,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145116338.330, "dur": 17.566, + "args": { + "External id": 513934,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145116362.022, "dur": 9.949, + "args": { + "External id": 513935,"Record function id": 0, "Sequence number": 5516328, "Fwd thread id": 1, "Ev Idx": 7054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145116365.563, "dur": 4.431, + "args": { + "External id": 513936,"Sequence number": 5516328, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7055 + } + }, + { + "ph": "f", "id": 359, "pid": 2338711, "tid": 2379440, "ts": 6274145116365.563, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145116366.592, "dur": 3.230, + "args": { + "External id": 513937,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145116367.584, "dur": 1.684, + "args": { + "External id": 513938,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145116368.702, "dur": 0.418, + "args": { + "External id": 513939,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145116376.363, "dur": 7.399, + "args": { + "External id": 513940,"Record function id": 0, "Ev Idx": 7059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145116377.744, "dur": 5.532, + "args": { + "External id": 513941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145116378.778, "dur": 4.031, + "args": { + "External id": 513942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145116379.820, "dur": 2.849, + "args": { + "External id": 513943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145116387.992, "dur": 433.490, + "args": { + "External id": 513944,"Record function id": 0, "Sequence number": 5516327, "Fwd thread id": 1, "Ev Idx": 7063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145116389.166, "dur": 395.798, + "args": { + "External id": 513945,"Sequence number": 5516327, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7064 + } + }, + { + "ph": "f", "id": 360, "pid": 2338711, "tid": 2379440, "ts": 6274145116389.166, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145116425.537, "dur": 4.082, + "args": { + "External id": 513946,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145116428.430, "dur": 1.053, + "args": { + "External id": 513947,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145116444.385, "dur": 4.261, + "args": { + "External id": 513948,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145116459.485, "dur": 2.314, + "args": { + "External id": 513949,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145116656.111, "dur": 4.013, + "args": { + "External id": 513950,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 7069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145116666.174, "dur": 45.225, + "args": { + "External id": 513951,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 7070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145116682.681, "dur": 1.377, + "args": { + "External id": 513952,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 7071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145116717.812, "dur": 35.179, + "args": { + "External id": 513953,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 7072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145116719.953, "dur": 32.847, + "args": { + "External id": 513954,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 7073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145116726.999, "dur": 4.850, + "args": { + "External id": 513955,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145116733.694, "dur": 18.472, + "args": { + "External id": 513956,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 7075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274145116761.663, "dur": 3.231, + "args": { + "External id": 513957,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 7076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145116763.118, "dur": 1.684, + "args": { + "External id": 513958,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 7077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145116771.991, "dur": 4.908, + "args": { + "External id": 513959,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145116773.258, "dur": 3.473, + "args": { + "External id": 513960,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145116798.777, "dur": 17.577, + "args": { + "External id": 513961,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145116835.926, "dur": 10.682, + "args": { + "External id": 513962,"Record function id": 0, "Ev Idx": 7081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145116838.653, "dur": 7.107, + "args": { + "External id": 513963,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145116841.033, "dur": 3.587, + "args": { + "External id": 513964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145116842.515, "dur": 1.998, + "args": { + "External id": 513965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145116850.432, "dur": 5.457, + "args": { + "External id": 513966,"Record function id": 0, "Sequence number": 5516326, "Fwd thread id": 1, "Ev Idx": 7085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145116851.727, "dur": 1.571, + "args": { + "External id": 513967,"Sequence number": 5516326, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7086 + } + }, + { + "ph": "f", "id": 361, "pid": 2338711, "tid": 2379440, "ts": 6274145116851.727, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145116860.083, "dur": 512.216, + "args": { + "External id": 513968,"Record function id": 0, "Sequence number": 5516325, "Fwd thread id": 1, "Ev Idx": 7087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145116861.687, "dur": 497.457, + "args": { + "External id": 513969,"Sequence number": 5516325, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7088 + } + }, + { + "ph": "f", "id": 362, "pid": 2338711, "tid": 2379440, "ts": 6274145116861.687, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145116895.934, "dur": 9.060, + "args": { + "External id": 513970,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338711, "tid": 2379440, + "ts": 6274145116901.111, "dur": 3.519, + "args": { + "External id": 513971,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 7090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145116908.180, "dur": 5.817, + "args": { + "External id": 513972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145116910.190, "dur": 3.111, + "args": { + "External id": 513973,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145116912.420, "dur": 0.707, + "args": { + "External id": 513974,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2379440, + "ts": 6274145116919.556, "dur": 112.432, + "args": { + "External id": 513975,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 7094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145116920.392, "dur": 3.288, + "args": { + "External id": 513976,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 7095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145116921.082, "dur": 2.006, + "args": { + "External id": 513977,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 7096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145116922.462, "dur": 0.493, + "args": { + "External id": 513978,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 7097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2379440, + "ts": 6274145116924.928, "dur": 106.571, + "args": { + "External id": 513979,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 7098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145116926.674, "dur": 103.977, + "args": { + "External id": 513980,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 7099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274145117036.062, "dur": 5.725, + "args": { + "External id": 513981,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 7100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145117040.228, "dur": 1.416, + "args": { + "External id": 513982,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 7101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145117116.427, "dur": 5.800, + "args": { + "External id": 513983,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145117123.935, "dur": 4.780, + "args": { + "External id": 513984,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145117130.281, "dur": 2.301, + "args": { + "External id": 513985,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145117170.268, "dur": 3.369, + "args": { + "External id": 513986,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145117171.154, "dur": 2.238, + "args": { + "External id": 513987,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338711, "tid": 2379440, + "ts": 6274145117198.234, "dur": 138.510, + "args": { + "External id": 513988,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 7107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2379440, + "ts": 6274145117203.636, "dur": 7.037, + "args": { + "External id": 513989,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145117208.645, "dur": 1.015, + "args": { + "External id": 513990,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 7109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274145117212.435, "dur": 6.272, + "args": { + "External id": 513991,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 7110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145117217.348, "dur": 0.639, + "args": { + "External id": 513992,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 7111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2379440, + "ts": 6274145117220.154, "dur": 2.422, + "args": { + "External id": 513993,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 7112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145117221.873, "dur": 0.364, + "args": { + "External id": 513994,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 7113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274145117223.601, "dur": 2.991, + "args": { + "External id": 513995,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 7114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145117225.649, "dur": 0.401, + "args": { + "External id": 513996,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 7115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274145117230.276, "dur": 3.917, + "args": { + "External id": 513997,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 7116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145117233.236, "dur": 0.632, + "args": { + "External id": 513998,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 7117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145117235.727, "dur": 8.306, + "args": { + "External id": 513999,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 7118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338711, "tid": 2379440, + "ts": 6274145117240.096, "dur": 3.767, + "args": { + "External id": 514000,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 7119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274145117247.516, "dur": 2.817, + "args": { + "External id": 514001,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 7120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145117249.518, "dur": 0.485, + "args": { + "External id": 514002,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 7121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145117251.883, "dur": 3.180, + "args": { + "External id": 514003,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 7122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145117253.476, "dur": 1.488, + "args": { + "External id": 514004,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 7123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274145117256.387, "dur": 67.226, + "args": { + "External id": 514005,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 7124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145117325.876, "dur": 1.319, + "args": { + "External id": 514006,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 7125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338711, "tid": 2379440, + "ts": 6274145117328.435, "dur": 3.694, + "args": { + "External id": 514007,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 7126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145117330.570, "dur": 0.732, + "args": { + "External id": 514008,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 7127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145117334.872, "dur": 0.731, + "args": { + "External id": 514009,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 7128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145117385.068, "dur": 8.993, + "args": { + "External id": 514010,"Record function id": 0, "Ev Idx": 7129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145117387.181, "dur": 6.159, + "args": { + "External id": 514011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145117389.152, "dur": 3.321, + "args": { + "External id": 514012,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145117390.527, "dur": 1.833, + "args": { + "External id": 514013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145117398.053, "dur": 8.069, + "args": { + "External id": 514014,"Record function id": 0, "Sequence number": 5516324, "Fwd thread id": 1, "Ev Idx": 7133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145117399.528, "dur": 4.210, + "args": { + "External id": 514015,"Sequence number": 5516324, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 7134 + } + }, + { + "ph": "f", "id": 363, "pid": 2338711, "tid": 2379440, "ts": 6274145117399.528, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145117401.478, "dur": 2.046, + "args": { + "External id": 514016,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145117402.565, "dur": 0.835, + "args": { + "External id": 514017,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145117409.869, "dur": 115.495, + "args": { + "External id": 514018,"Record function id": 0, "Sequence number": 5516323, "Fwd thread id": 1, "Ev Idx": 7137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145117410.835, "dur": 107.958, + "args": { + "External id": 514019,"Sequence number": 5516323, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 7138 + } + }, + { + "ph": "f", "id": 364, "pid": 2338711, "tid": 2379440, "ts": 6274145117410.835, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145117416.680, "dur": 6.716, + "args": { + "External id": 514020,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 7139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145117418.091, "dur": 4.699, + "args": { + "External id": 514021,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 7140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145117419.963, "dur": 2.641, + "args": { + "External id": 514022,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 7141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145117424.586, "dur": 43.591, + "args": { + "External id": 514023,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 7142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145117469.358, "dur": 6.880, + "args": { + "External id": 514024,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145117470.172, "dur": 5.431, + "args": { + "External id": 514025,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 7144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145117474.356, "dur": 1.062, + "args": { + "External id": 514026,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 7145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145117477.600, "dur": 3.737, + "args": { + "External id": 514027,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145117478.798, "dur": 2.069, + "args": { + "External id": 514028,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145117480.227, "dur": 0.578, + "args": { + "External id": 514029,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145117481.976, "dur": 36.080, + "args": { + "External id": 514030,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 7149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145117530.491, "dur": 7.648, + "args": { + "External id": 514031,"Record function id": 0, "Sequence number": 5516322, "Fwd thread id": 1, "Ev Idx": 7150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145117531.539, "dur": 5.281, + "args": { + "External id": 514032,"Sequence number": 5516322, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7151 + } + }, + { + "ph": "f", "id": 365, "pid": 2338711, "tid": 2379440, "ts": 6274145117531.539, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145117533.832, "dur": 2.841, + "args": { + "External id": 514033,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145117535.033, "dur": 1.491, + "args": { + "External id": 514034,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145117541.585, "dur": 9.760, + "args": { + "External id": 514035,"Record function id": 0, "Sequence number": 5516321, "Fwd thread id": 1, "Ev Idx": 7154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145117544.873, "dur": 4.619, + "args": { + "External id": 514036,"Sequence number": 5516321, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7155 + } + }, + { + "ph": "f", "id": 366, "pid": 2338711, "tid": 2379440, "ts": 6274145117544.873, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145117545.846, "dur": 3.439, + "args": { + "External id": 514037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145117546.531, "dur": 2.222, + "args": { + "External id": 514038,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145117548.132, "dur": 0.491, + "args": { + "External id": 514039,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145117555.262, "dur": 8.507, + "args": { + "External id": 514040,"Record function id": 0, "Ev Idx": 7159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145117557.139, "dur": 6.099, + "args": { + "External id": 514041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145117558.376, "dur": 4.591, + "args": { + "External id": 514042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145117558.923, "dur": 3.960, + "args": { + "External id": 514043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145117566.803, "dur": 7.755, + "args": { + "External id": 514044,"Record function id": 0, "Sequence number": 5516320, "Fwd thread id": 1, "Ev Idx": 7163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145117567.939, "dur": 4.883, + "args": { + "External id": 514045,"Sequence number": 5516320, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 7164 + } + }, + { + "ph": "f", "id": 367, "pid": 2338711, "tid": 2379440, "ts": 6274145117567.939, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145117568.963, "dur": 3.730, + "args": { + "External id": 514046,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145117571.582, "dur": 1.013, + "args": { + "External id": 514047,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145117577.565, "dur": 168.575, + "args": { + "External id": 514048,"Record function id": 0, "Sequence number": 5516319, "Fwd thread id": 1, "Ev Idx": 7167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145117628.366, "dur": 107.514, + "args": { + "External id": 514049,"Sequence number": 5516319, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 7168 + } + }, + { + "ph": "f", "id": 368, "pid": 2338711, "tid": 2379440, "ts": 6274145117628.366, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145117632.672, "dur": 6.064, + "args": { + "External id": 514050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 7169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145117633.723, "dur": 4.169, + "args": { + "External id": 514051,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 7170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145117636.554, "dur": 0.972, + "args": { + "External id": 514052,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 7171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145117642.251, "dur": 45.446, + "args": { + "External id": 514053,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 7172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145117689.100, "dur": 5.084, + "args": { + "External id": 514054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145117689.837, "dur": 3.706, + "args": { + "External id": 514055,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 7174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145117691.262, "dur": 2.121, + "args": { + "External id": 514056,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 7175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145117695.465, "dur": 5.078, + "args": { + "External id": 514057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145117696.221, "dur": 3.872, + "args": { + "External id": 514058,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145117699.583, "dur": 0.406, + "args": { + "External id": 514059,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145117700.940, "dur": 34.166, + "args": { + "External id": 514060,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 7179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145117753.828, "dur": 35.919, + "args": { + "External id": 514061,"Record function id": 0, "Sequence number": 5516318, "Fwd thread id": 1, "Ev Idx": 7180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145117754.891, "dur": 4.442, + "args": { + "External id": 514062,"Sequence number": 5516318, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7181 + } + }, + { + "ph": "f", "id": 369, "pid": 2338711, "tid": 2379440, "ts": 6274145117754.891, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145117756.813, "dur": 2.393, + "args": { + "External id": 514063,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145117757.731, "dur": 1.330, + "args": { + "External id": 514064,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2379440, + "ts": 6274145117762.632, "dur": 24.082, + "args": { + "External id": 514065,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145117793.441, "dur": 9.931, + "args": { + "External id": 514066,"Record function id": 0, "Sequence number": 5516317, "Fwd thread id": 1, "Ev Idx": 7185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145117794.330, "dur": 7.070, + "args": { + "External id": 514067,"Sequence number": 5516317, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7186 + } + }, + { + "ph": "f", "id": 370, "pid": 2338711, "tid": 2379440, "ts": 6274145117794.330, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145117795.205, "dur": 6.009, + "args": { + "External id": 514068,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145117798.554, "dur": 2.061, + "args": { + "External id": 514069,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145117799.948, "dur": 0.546, + "args": { + "External id": 514070,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145117807.699, "dur": 5.870, + "args": { + "External id": 514071,"Record function id": 0, "Ev Idx": 7190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145117809.308, "dur": 3.673, + "args": { + "External id": 514072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145117810.510, "dur": 2.101, + "args": { + "External id": 514073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145117811.029, "dur": 1.493, + "args": { + "External id": 514074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145117817.495, "dur": 486.530, + "args": { + "External id": 514075,"Record function id": 0, "Sequence number": 5516316, "Fwd thread id": 1, "Ev Idx": 7194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145117822.260, "dur": 444.735, + "args": { + "External id": 514076,"Sequence number": 5516316, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 7195 + } + }, + { + "ph": "f", "id": 371, "pid": 2338711, "tid": 2379440, "ts": 6274145117822.260, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2379440, + "ts": 6274145117851.123, "dur": 33.455, + "args": { + "External id": 514077,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274145117852.858, "dur": 31.518, + "args": { + "External id": 514078,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145117856.001, "dur": 6.697, + "args": { + "External id": 514079,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 7198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145117858.728, "dur": 3.390, + "args": { + "External id": 514080,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145117864.120, "dur": 19.803, + "args": { + "External id": 514081,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145117897.292, "dur": 4.506, + "args": { + "External id": 514082,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145117900.369, "dur": 1.272, + "args": { + "External id": 514083,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145117905.932, "dur": 1.623, + "args": { + "External id": 514084,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145117906.716, "dur": 0.732, + "args": { + "External id": 514085,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145117920.318, "dur": 2.577, + "args": { + "External id": 514086,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145117936.925, "dur": 2.634, + "args": { + "External id": 514087,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145118153.661, "dur": 6.819, + "args": { + "External id": 514088,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 7207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145118165.207, "dur": 33.873, + "args": { + "External id": 514089,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 7208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145118175.860, "dur": 1.085, + "args": { + "External id": 514090,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 7209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145118205.152, "dur": 27.818, + "args": { + "External id": 514091,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 7210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145118206.855, "dur": 25.906, + "args": { + "External id": 514092,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 7211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145118211.278, "dur": 4.391, + "args": { + "External id": 514093,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145118217.116, "dur": 15.118, + "args": { + "External id": 514094,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 7213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274145118240.204, "dur": 2.496, + "args": { + "External id": 514095,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 7214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145118241.550, "dur": 1.014, + "args": { + "External id": 514096,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 7215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145118250.238, "dur": 2.916, + "args": { + "External id": 514097,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145118251.695, "dur": 1.347, + "args": { + "External id": 514098,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145118255.394, "dur": 2.049, + "args": { + "External id": 514099,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145118256.628, "dur": 0.721, + "args": { + "External id": 514100,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145118284.776, "dur": 17.661, + "args": { + "External id": 514101,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145118319.288, "dur": 8.850, + "args": { + "External id": 514102,"Record function id": 0, "Ev Idx": 7221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145118321.338, "dur": 6.154, + "args": { + "External id": 514103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145118323.795, "dur": 2.801, + "args": { + "External id": 514104,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145118324.898, "dur": 1.610, + "args": { + "External id": 514105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145118331.968, "dur": 7.893, + "args": { + "External id": 514106,"Record function id": 0, "Sequence number": 5516315, "Fwd thread id": 1, "Ev Idx": 7225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145118332.852, "dur": 4.277, + "args": { + "External id": 514107,"Sequence number": 5516315, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7226 + } + }, + { + "ph": "f", "id": 372, "pid": 2338711, "tid": 2379440, "ts": 6274145118332.852, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145118334.935, "dur": 1.940, + "args": { + "External id": 514108,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145118335.643, "dur": 1.111, + "args": { + "External id": 514109,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145118343.445, "dur": 145.853, + "args": { + "External id": 514110,"Record function id": 0, "Sequence number": 5516314, "Fwd thread id": 1, "Ev Idx": 7229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145118344.248, "dur": 138.632, + "args": { + "External id": 514111,"Sequence number": 5516314, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7230 + } + }, + { + "ph": "f", "id": 373, "pid": 2338711, "tid": 2379440, "ts": 6274145118344.248, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145118349.390, "dur": 4.955, + "args": { + "External id": 514112,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145118350.898, "dur": 2.907, + "args": { + "External id": 514113,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 7232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145118352.762, "dur": 0.815, + "args": { + "External id": 514114,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 7233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145118355.704, "dur": 68.922, + "args": { + "External id": 514115,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 7234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145118429.305, "dur": 5.853, + "args": { + "External id": 514116,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145118430.162, "dur": 4.402, + "args": { + "External id": 514117,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145118431.746, "dur": 2.673, + "args": { + "External id": 514118,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145118439.018, "dur": 5.590, + "args": { + "External id": 514119,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145118440.019, "dur": 4.124, + "args": { + "External id": 514120,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145118441.413, "dur": 2.642, + "args": { + "External id": 514121,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145118445.130, "dur": 37.013, + "args": { + "External id": 514122,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 7241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145118494.244, "dur": 9.660, + "args": { + "External id": 514123,"Record function id": 0, "Sequence number": 5516313, "Fwd thread id": 1, "Ev Idx": 7242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145118495.206, "dur": 6.494, + "args": { + "External id": 514124,"Sequence number": 5516313, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7243 + } + }, + { + "ph": "f", "id": 374, "pid": 2338711, "tid": 2379440, "ts": 6274145118495.206, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145118496.970, "dur": 4.589, + "args": { + "External id": 514125,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145118500.221, "dur": 1.200, + "args": { + "External id": 514126,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145118507.442, "dur": 7.830, + "args": { + "External id": 514127,"Record function id": 0, "Sequence number": 5516312, "Fwd thread id": 1, "Ev Idx": 7246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145118508.517, "dur": 4.749, + "args": { + "External id": 514128,"Sequence number": 5516312, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7247 + } + }, + { + "ph": "f", "id": 375, "pid": 2338711, "tid": 2379440, "ts": 6274145118508.517, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145118509.517, "dur": 3.537, + "args": { + "External id": 514129,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145118510.146, "dur": 2.442, + "args": { + "External id": 514130,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145118511.476, "dur": 0.974, + "args": { + "External id": 514131,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145118519.314, "dur": 5.230, + "args": { + "External id": 514132,"Record function id": 0, "Ev Idx": 7251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145118520.922, "dur": 3.153, + "args": { + "External id": 514133,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145118521.884, "dur": 1.900, + "args": { + "External id": 514134,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145118522.588, "dur": 1.076, + "args": { + "External id": 514135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145118527.791, "dur": 8.438, + "args": { + "External id": 514136,"Record function id": 0, "Sequence number": 5516311, "Fwd thread id": 1, "Ev Idx": 7255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145118530.860, "dur": 3.630, + "args": { + "External id": 514137,"Sequence number": 5516311, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7256 + } + }, + { + "ph": "f", "id": 376, "pid": 2338711, "tid": 2379440, "ts": 6274145118530.860, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145118531.780, "dur": 2.570, + "args": { + "External id": 514138,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145118533.028, "dur": 1.192, + "args": { + "External id": 514139,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145118540.404, "dur": 404.289, + "args": { + "External id": 514140,"Record function id": 0, "Sequence number": 5516310, "Fwd thread id": 1, "Ev Idx": 7259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145118541.838, "dur": 382.193, + "args": { + "External id": 514141,"Sequence number": 5516310, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 7260 + } + }, + { + "ph": "f", "id": 377, "pid": 2338711, "tid": 2379440, "ts": 6274145118541.838, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145118556.825, "dur": 6.356, + "args": { + "External id": 514142,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 7261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145118559.682, "dur": 3.068, + "args": { + "External id": 514143,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145118565.126, "dur": 4.255, + "args": { + "External id": 514144,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 7263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145118567.104, "dur": 2.070, + "args": { + "External id": 514145,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145118571.004, "dur": 5.688, + "args": { + "External id": 514146,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 7265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145118572.742, "dur": 3.757, + "args": { + "External id": 514147,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145118625.733, "dur": 270.162, + "args": { + "External id": 514148,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145118725.155, "dur": 5.578, + "args": { + "External id": 514149,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145118732.826, "dur": 2.925, + "args": { + "External id": 514150,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145118739.009, "dur": 2.019, + "args": { + "External id": 514151,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145118742.210, "dur": 2.373, + "args": { + "External id": 514152,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145118796.022, "dur": 3.383, + "args": { + "External id": 514153,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145118797.378, "dur": 1.939, + "args": { + "External id": 514154,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145118801.445, "dur": 28.791, + "args": { + "External id": 514155,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145118806.802, "dur": 2.071, + "args": { + "External id": 514156,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145118831.581, "dur": 1.752, + "args": { + "External id": 514157,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145118832.837, "dur": 0.419, + "args": { + "External id": 514158,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145118834.618, "dur": 14.466, + "args": { + "External id": 514159,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145118837.076, "dur": 0.488, + "args": { + "External id": 514160,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274145118910.597, "dur": 4.327, + "args": { + "External id": 514161,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 7280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274145118917.852, "dur": 0.689, + "args": { + "External id": 514162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2379440, + "ts": 6274145118920.547, "dur": 0.523, + "args": { + "External id": 514163,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145118957.917, "dur": 284.588, + "args": { + "External id": 514164,"Record function id": 0, "Sequence number": 5516309, "Fwd thread id": 1, "Ev Idx": 7283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145118959.838, "dur": 273.237, + "args": { + "External id": 514165,"Sequence number": 5516309, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7284 + } + }, + { + "ph": "f", "id": 378, "pid": 2338711, "tid": 2379440, "ts": 6274145118959.838, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274145118981.885, "dur": 47.557, + "args": { + "External id": 514166,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145118985.530, "dur": 6.036, + "args": { + "External id": 514167,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145118993.247, "dur": 35.584, + "args": { + "External id": 514168,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 7287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145119040.156, "dur": 5.447, + "args": { + "External id": 514169,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 7288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145119042.955, "dur": 2.220, + "args": { + "External id": 514170,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145119252.959, "dur": 166.081, + "args": { + "External id": 514171,"Record function id": 0, "Sequence number": 5516308, "Fwd thread id": 1, "Ev Idx": 7290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145119255.255, "dur": 157.670, + "args": { + "External id": 514172,"Sequence number": 5516308, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 7291 + } + }, + { + "ph": "f", "id": 379, "pid": 2338711, "tid": 2379440, "ts": 6274145119255.255, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2379440, + "ts": 6274145119268.482, "dur": 41.889, + "args": { + "External id": 514173,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145119271.514, "dur": 3.489, + "args": { + "External id": 514174,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145119276.086, "dur": 33.634, + "args": { + "External id": 514175,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 7294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2379440, + "ts": 6274145119318.495, "dur": 5.083, + "args": { + "External id": 514176,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 7295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145119320.633, "dur": 2.552, + "args": { + "External id": 514177,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119428.240, "dur": 13.469, + "args": { + "External id": 514178,"Record function id": 0, "Sequence number": 5516307, "Fwd thread id": 1, "Ev Idx": 7297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119430.057, "dur": 8.987, + "args": { + "External id": 514179,"Sequence number": 5516307, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7298 + } + }, + { + "ph": "f", "id": 380, "pid": 2338711, "tid": 2379440, "ts": 6274145119430.057, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145119433.087, "dur": 5.600, + "args": { + "External id": 514180,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145119434.627, "dur": 3.896, + "args": { + "External id": 514181,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119445.717, "dur": 7.085, + "args": { + "External id": 514182,"Record function id": 0, "Sequence number": 5516306, "Fwd thread id": 1, "Ev Idx": 7301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119446.778, "dur": 4.126, + "args": { + "External id": 514183,"Sequence number": 5516306, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7302 + } + }, + { + "ph": "f", "id": 381, "pid": 2338711, "tid": 2379440, "ts": 6274145119446.778, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145119448.278, "dur": 2.469, + "args": { + "External id": 514184,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145119449.756, "dur": 0.828, + "args": { + "External id": 514185,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119455.747, "dur": 7.908, + "args": { + "External id": 514186,"Record function id": 0, "Sequence number": 5516305, "Fwd thread id": 1, "Ev Idx": 7305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119456.619, "dur": 5.341, + "args": { + "External id": 514187,"Sequence number": 5516305, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 7306 + } + }, + { + "ph": "f", "id": 382, "pid": 2338711, "tid": 2379440, "ts": 6274145119456.619, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145119460.206, "dur": 1.605, + "args": { + "External id": 514188,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145119461.009, "dur": 0.705, + "args": { + "External id": 514189,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119467.381, "dur": 6.105, + "args": { + "External id": 514190,"Record function id": 0, "Sequence number": 5516304, "Fwd thread id": 1, "Ev Idx": 7309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119468.504, "dur": 3.130, + "args": { + "External id": 514191,"Sequence number": 5516304, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 7310 + } + }, + { + "ph": "f", "id": 383, "pid": 2338711, "tid": 2379440, "ts": 6274145119468.504, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145119469.477, "dur": 1.994, + "args": { + "External id": 514192,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145119470.481, "dur": 0.891, + "args": { + "External id": 514193,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119476.741, "dur": 202.179, + "args": { + "External id": 514194,"Record function id": 0, "Sequence number": 5516303, "Fwd thread id": 1, "Ev Idx": 7313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119477.560, "dur": 188.240, + "args": { + "External id": 514195,"Sequence number": 5516303, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7314 + } + }, + { + "ph": "f", "id": 384, "pid": 2338711, "tid": 2379440, "ts": 6274145119477.560, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145119481.798, "dur": 11.433, + "args": { + "External id": 514196,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145119486.129, "dur": 6.279, + "args": { + "External id": 514197,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 7316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145119488.405, "dur": 3.705, + "args": { + "External id": 514198,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 7317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145119495.108, "dur": 72.408, + "args": { + "External id": 514199,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 7318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145119568.817, "dur": 4.385, + "args": { + "External id": 514200,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145119569.598, "dur": 2.857, + "args": { + "External id": 514201,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 7320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145119571.462, "dur": 0.823, + "args": { + "External id": 514202,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 7321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145119577.078, "dur": 3.197, + "args": { + "External id": 514203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145119578.260, "dur": 1.538, + "args": { + "External id": 514204,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145119579.316, "dur": 0.395, + "args": { + "External id": 514205,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145119580.915, "dur": 83.072, + "args": { + "External id": 514206,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 7325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119688.357, "dur": 9.171, + "args": { + "External id": 514207,"Record function id": 0, "Sequence number": 5516302, "Fwd thread id": 1, "Ev Idx": 7326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119689.689, "dur": 6.088, + "args": { + "External id": 514208,"Sequence number": 5516302, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7327 + } + }, + { + "ph": "f", "id": 385, "pid": 2338711, "tid": 2379440, "ts": 6274145119689.689, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145119692.175, "dur": 3.423, + "args": { + "External id": 514209,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145119693.166, "dur": 2.261, + "args": { + "External id": 514210,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119700.811, "dur": 11.305, + "args": { + "External id": 514211,"Record function id": 0, "Sequence number": 5516301, "Fwd thread id": 1, "Ev Idx": 7330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119704.336, "dur": 5.518, + "args": { + "External id": 514212,"Sequence number": 5516301, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7331 + } + }, + { + "ph": "f", "id": 386, "pid": 2338711, "tid": 2379440, "ts": 6274145119704.336, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145119705.298, "dur": 4.353, + "args": { + "External id": 514213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145119706.478, "dur": 2.655, + "args": { + "External id": 514214,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145119708.417, "dur": 0.595, + "args": { + "External id": 514215,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145119718.103, "dur": 12.620, + "args": { + "External id": 514216,"Record function id": 0, "Ev Idx": 7335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145119719.847, "dur": 9.949, + "args": { + "External id": 514217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145119722.550, "dur": 6.828, + "args": { + "External id": 514218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145119723.786, "dur": 5.513, + "args": { + "External id": 514219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119734.078, "dur": 8.494, + "args": { + "External id": 514220,"Record function id": 0, "Sequence number": 5516300, "Fwd thread id": 1, "Ev Idx": 7339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119735.291, "dur": 5.176, + "args": { + "External id": 514221,"Sequence number": 5516300, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 7340 + } + }, + { + "ph": "f", "id": 387, "pid": 2338711, "tid": 2379440, "ts": 6274145119735.291, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145119736.497, "dur": 3.799, + "args": { + "External id": 514222,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145119738.954, "dur": 1.240, + "args": { + "External id": 514223,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119745.632, "dur": 98.988, + "args": { + "External id": 514224,"Record function id": 0, "Sequence number": 5516299, "Fwd thread id": 1, "Ev Idx": 7343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119746.503, "dur": 90.529, + "args": { + "External id": 514225,"Sequence number": 5516299, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7344 + } + }, + { + "ph": "f", "id": 388, "pid": 2338711, "tid": 2379440, "ts": 6274145119746.503, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145119748.790, "dur": 2.835, + "args": { + "External id": 514226,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145119749.316, "dur": 1.842, + "args": { + "External id": 514227,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 7346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145119750.432, "dur": 0.619, + "args": { + "External id": 514228,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 7347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145119754.938, "dur": 32.947, + "args": { + "External id": 514229,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 7348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145119789.349, "dur": 3.860, + "args": { + "External id": 514230,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145119789.955, "dur": 2.742, + "args": { + "External id": 514231,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 7350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145119791.367, "dur": 1.207, + "args": { + "External id": 514232,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 7351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145119794.497, "dur": 5.247, + "args": { + "External id": 514233,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145119795.713, "dur": 3.587, + "args": { + "External id": 514234,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145119798.766, "dur": 0.442, + "args": { + "External id": 514235,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145119800.414, "dur": 35.877, + "args": { + "External id": 514236,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 7355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119849.372, "dur": 48.749, + "args": { + "External id": 514237,"Record function id": 0, "Sequence number": 5516298, "Fwd thread id": 1, "Ev Idx": 7356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119858.346, "dur": 8.026, + "args": { + "External id": 514238,"Sequence number": 5516298, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7357 + } + }, + { + "ph": "f", "id": 389, "pid": 2338711, "tid": 2379440, "ts": 6274145119858.346, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145119863.788, "dur": 2.432, + "args": { + "External id": 514239,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145119864.932, "dur": 1.152, + "args": { + "External id": 514240,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2379440, + "ts": 6274145119869.715, "dur": 25.608, + "args": { + "External id": 514241,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119901.957, "dur": 10.254, + "args": { + "External id": 514242,"Record function id": 0, "Sequence number": 5516297, "Fwd thread id": 1, "Ev Idx": 7361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119903.130, "dur": 7.206, + "args": { + "External id": 514243,"Sequence number": 5516297, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7362 + } + }, + { + "ph": "f", "id": 390, "pid": 2338711, "tid": 2379440, "ts": 6274145119903.130, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145119906.481, "dur": 3.631, + "args": { + "External id": 514244,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145119907.486, "dur": 2.040, + "args": { + "External id": 514245,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145119908.871, "dur": 0.554, + "args": { + "External id": 514246,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145119916.243, "dur": 6.343, + "args": { + "External id": 514247,"Record function id": 0, "Ev Idx": 7366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145119917.903, "dur": 4.192, + "args": { + "External id": 514248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145119918.971, "dur": 2.877, + "args": { + "External id": 514249,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145119920.129, "dur": 1.589, + "args": { + "External id": 514250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119925.858, "dur": 8.285, + "args": { + "External id": 514251,"Record function id": 0, "Sequence number": 5516296, "Fwd thread id": 1, "Ev Idx": 7370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119927.074, "dur": 5.311, + "args": { + "External id": 514252,"Sequence number": 5516296, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7371 + } + }, + { + "ph": "f", "id": 391, "pid": 2338711, "tid": 2379440, "ts": 6274145119927.074, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145119928.117, "dur": 4.097, + "args": { + "External id": 514253,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145119930.873, "dur": 1.230, + "args": { + "External id": 514254,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119937.338, "dur": 98.427, + "args": { + "External id": 514255,"Record function id": 0, "Sequence number": 5516295, "Fwd thread id": 1, "Ev Idx": 7374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145119938.334, "dur": 91.268, + "args": { + "External id": 514256,"Sequence number": 5516295, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7375 + } + }, + { + "ph": "f", "id": 392, "pid": 2338711, "tid": 2379440, "ts": 6274145119938.334, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145119940.196, "dur": 3.121, + "args": { + "External id": 514257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145119940.942, "dur": 1.965, + "args": { + "External id": 514258,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 7377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145119942.140, "dur": 0.654, + "args": { + "External id": 514259,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 7378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145119944.162, "dur": 35.603, + "args": { + "External id": 514260,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 7379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145119983.332, "dur": 3.628, + "args": { + "External id": 514261,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145119984.073, "dur": 2.341, + "args": { + "External id": 514262,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145119985.665, "dur": 0.638, + "args": { + "External id": 514263,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145119988.336, "dur": 3.360, + "args": { + "External id": 514264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145119989.171, "dur": 2.111, + "args": { + "External id": 514265,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145119990.747, "dur": 0.447, + "args": { + "External id": 514266,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145119994.330, "dur": 34.544, + "args": { + "External id": 514267,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 7386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145120040.552, "dur": 65.553, + "args": { + "External id": 514268,"Record function id": 0, "Sequence number": 5516294, "Fwd thread id": 1, "Ev Idx": 7387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145120041.671, "dur": 3.913, + "args": { + "External id": 514269,"Sequence number": 5516294, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7388 + } + }, + { + "ph": "f", "id": 393, "pid": 2338711, "tid": 2379440, "ts": 6274145120041.671, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145120043.321, "dur": 2.116, + "args": { + "External id": 514270,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145120044.134, "dur": 1.170, + "args": { + "External id": 514271,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145120048.331, "dur": 53.844, + "args": { + "External id": 514272,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145120112.325, "dur": 11.694, + "args": { + "External id": 514273,"Record function id": 0, "Sequence number": 5516293, "Fwd thread id": 1, "Ev Idx": 7392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338711, "tid": 2379440, + "ts": 6274145120113.632, "dur": 8.120, + "args": { + "External id": 514274,"Sequence number": 5516293, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7393 + } + }, + { + "ph": "f", "id": 394, "pid": 2338711, "tid": 2379440, "ts": 6274145120113.632, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2379440, + "ts": 6274145120114.870, "dur": 6.643, + "args": { + "External id": 514275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2379440, + "ts": 6274145120116.090, "dur": 4.598, + "args": { + "External id": 514276,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120119.740, "dur": 0.820, + "args": { + "External id": 514277,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145120128.346, "dur": 6.361, + "args": { + "External id": 514278,"Record function id": 0, "Ev Idx": 7397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145120129.950, "dur": 4.249, + "args": { + "External id": 514279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145120131.478, "dur": 2.224, + "args": { + "External id": 514280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145120132.014, "dur": 1.546, + "args": { + "External id": 514281,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145120139.059, "dur": 393.129, + "args": { + "External id": 514282,"Record function id": 0, "Sequence number": 5516292, "Fwd thread id": 1, "Ev Idx": 7401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145120140.254, "dur": 358.057, + "args": { + "External id": 514283,"Sequence number": 5516292, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7402 + } + }, + { + "ph": "f", "id": 395, "pid": 2338711, "tid": 2379440, "ts": 6274145120140.254, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145120181.433, "dur": 2.498, + "args": { + "External id": 514284,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145120182.347, "dur": 1.433, + "args": { + "External id": 514285,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145120201.702, "dur": 4.640, + "args": { + "External id": 514286,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145120216.495, "dur": 2.565, + "args": { + "External id": 514287,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145120384.275, "dur": 1.590, + "args": { + "External id": 514288,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 7407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145120392.622, "dur": 41.811, + "args": { + "External id": 514289,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 7408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120406.014, "dur": 1.112, + "args": { + "External id": 514290,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 7409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145120440.462, "dur": 36.657, + "args": { + "External id": 514291,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 7410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145120442.328, "dur": 34.591, + "args": { + "External id": 514292,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 7411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120446.739, "dur": 5.057, + "args": { + "External id": 514293,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145120455.950, "dur": 20.421, + "args": { + "External id": 514294,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 7413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2379440, + "ts": 6274145120481.687, "dur": 2.325, + "args": { + "External id": 514295,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 7414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145120482.805, "dur": 1.069, + "args": { + "External id": 514296,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 7415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145120490.073, "dur": 1.873, + "args": { + "External id": 514297,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145120490.999, "dur": 0.860, + "args": { + "External id": 514298,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145120512.527, "dur": 14.996, + "args": { + "External id": 514299,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145120541.336, "dur": 7.554, + "args": { + "External id": 514300,"Record function id": 0, "Ev Idx": 7419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145120543.647, "dur": 4.514, + "args": { + "External id": 514301,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145120545.348, "dur": 1.960, + "args": { + "External id": 514302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145120546.031, "dur": 1.164, + "args": { + "External id": 514303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145120552.483, "dur": 2966.120, + "args": { + "External id": 514304,"Record function id": 0, "Ev Idx": 7423 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.25)", "pid": 2338711, "tid": 2379440, + "ts": 6274145120605.599, "dur": 977.306, + "args": { + "External id": 514305,"Record function id": 0, "Ev Idx": 7424 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.24", "pid": 2338711, "tid": 2379440, + "ts": 6274145120638.777, "dur": 935.477, + "args": { + "External id": 514306,"Record function id": 0, "Ev Idx": 7425 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2338711, "tid": 2379440, + "ts": 6274145120654.290, "dur": 904.206, + "args": { + "External id": 514307,"Record function id": 0, "Ev Idx": 7426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145120737.661, "dur": 9.167, + "args": { + "External id": 514308,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274145120763.565, "dur": 33.313, + "args": { + "External id": 514309,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120768.201, "dur": 1.614, + "args": { + "External id": 514310,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120771.966, "dur": 0.547, + "args": { + "External id": 514311,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120775.368, "dur": 0.378, + "args": { + "External id": 514312,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120777.169, "dur": 0.423, + "args": { + "External id": 514313,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120778.788, "dur": 2.803, + "args": { + "External id": 514314,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120784.906, "dur": 0.404, + "args": { + "External id": 514315,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120786.799, "dur": 0.369, + "args": { + "External id": 514316,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120788.891, "dur": 1.622, + "args": { + "External id": 514317,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120791.640, "dur": 0.558, + "args": { + "External id": 514318,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145120807.772, "dur": 48.539, + "args": { + "External id": 514319,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274145120896.044, "dur": 117.823, + "args": { + "External id": 514320,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 7439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145120906.944, "dur": 4.217, + "args": { + "External id": 514321,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274145120916.461, "dur": 10.623, + "args": { + "External id": 514322,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145120921.108, "dur": 5.586, + "args": { + "External id": 514323,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 7442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120924.553, "dur": 0.662, + "args": { + "External id": 514324,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274145120933.673, "dur": 27.347, + "args": { + "External id": 514325,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120935.779, "dur": 0.652, + "args": { + "External id": 514326,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120938.194, "dur": 4.070, + "args": { + "External id": 514327,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120943.195, "dur": 0.502, + "args": { + "External id": 514328,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120945.352, "dur": 0.416, + "args": { + "External id": 514329,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120948.314, "dur": 0.303, + "args": { + "External id": 514330,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120950.286, "dur": 0.422, + "args": { + "External id": 514331,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120951.835, "dur": 0.387, + "args": { + "External id": 514332,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120954.926, "dur": 0.443, + "args": { + "External id": 514333,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145120956.615, "dur": 0.626, + "args": { + "External id": 514334,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145120973.233, "dur": 30.834, + "args": { + "External id": 514335,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274145121109.761, "dur": 337.561, + "args": { + "External id": 514336,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 7455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145121145.069, "dur": 296.953, + "args": { + "External id": 514337,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7456, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274145121156.827, "dur": 279.427, + "args": { + "External id": 514338,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 7457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145121472.600, "dur": 2.204, + "args": { + "External id": 514339,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7458, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145121612.094, "dur": 1882.099, + "args": { + "External id": 514340,"Sequence number": 5516291, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7459 + } + }, + { + "ph": "f", "id": 396, "pid": 2338711, "tid": 2379440, "ts": 6274145121612.094, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145121744.332, "dur": 113.137, + "args": { + "External id": 514341,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 7460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145121904.304, "dur": 40.033, + "args": { + "External id": 514342,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 7461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274145121964.709, "dur": 47.081, + "args": { + "External id": 514343,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 7462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145122021.569, "dur": 31.728, + "args": { + "External id": 514344,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145122100.019, "dur": 46.563, + "args": { + "External id": 514345,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145122154.870, "dur": 29.182, + "args": { + "External id": 514346,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145122193.829, "dur": 30.619, + "args": { + "External id": 514347,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145122252.617, "dur": 25.475, + "args": { + "External id": 514348,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 7467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145122297.407, "dur": 29.849, + "args": { + "External id": 514349,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145122354.440, "dur": 17.267, + "args": { + "External id": 514350,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145122384.844, "dur": 15.026, + "args": { + "External id": 514351,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145122411.291, "dur": 39.752, + "args": { + "External id": 514352,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145122454.745, "dur": 32.946, + "args": { + "External id": 514353,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145122519.483, "dur": 306.316, + "args": { + "External id": 514354,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145122628.858, "dur": 10.388, + "args": { + "External id": 514355,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145122642.283, "dur": 4.891, + "args": { + "External id": 514356,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145122648.650, "dur": 3.167, + "args": { + "External id": 514357,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145122653.513, "dur": 4.813, + "args": { + "External id": 514358,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145122711.233, "dur": 6.631, + "args": { + "External id": 514359,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145122713.952, "dur": 3.647, + "args": { + "External id": 514360,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145122720.320, "dur": 32.734, + "args": { + "External id": 514361,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145122726.723, "dur": 1.699, + "args": { + "External id": 514362,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145122757.568, "dur": 1.818, + "args": { + "External id": 514363,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145122758.891, "dur": 0.426, + "args": { + "External id": 514364,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145122760.148, "dur": 16.333, + "args": { + "External id": 514365,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145122762.490, "dur": 1.654, + "args": { + "External id": 514366,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145122866.461, "dur": 30.409, + "args": { + "External id": 514367,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145122917.331, "dur": 17.446, + "args": { + "External id": 514368,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145122943.033, "dur": 54.103, + "args": { + "External id": 514369,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145123003.925, "dur": 41.261, + "args": { + "External id": 514370,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145123055.317, "dur": 67.892, + "args": { + "External id": 514371,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145123132.795, "dur": 32.167, + "args": { + "External id": 514372,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145123173.037, "dur": 28.005, + "args": { + "External id": 514373,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145123207.251, "dur": 29.680, + "args": { + "External id": 514374,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274145123261.605, "dur": 25.132, + "args": { + "External id": 514375,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 7494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145123306.899, "dur": 23.994, + "args": { + "External id": 514376,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145123348.079, "dur": 16.772, + "args": { + "External id": 514377,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145123383.757, "dur": 28.312, + "args": { + "External id": 514378,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274145123436.730, "dur": 22.901, + "args": { + "External id": 514379,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 7498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145123542.381, "dur": 15.181, + "args": { + "External id": 514380,"Record function id": 0, "Ev Idx": 7499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145123545.576, "dur": 10.991, + "args": { + "External id": 514381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145123550.020, "dur": 5.661, + "args": { + "External id": 514382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145123551.515, "dur": 4.080, + "args": { + "External id": 514383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145123561.491, "dur": 4.349, + "args": { + "External id": 514384,"Record function id": 0, "Ev Idx": 7503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145123562.858, "dur": 2.529, + "args": { + "External id": 514385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145123563.574, "dur": 1.350, + "args": { + "External id": 514386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145123564.056, "dur": 0.771, + "args": { + "External id": 514387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145123569.173, "dur": 7.230, + "args": { + "External id": 514388,"Record function id": 0, "Ev Idx": 7507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145123570.680, "dur": 5.282, + "args": { + "External id": 514389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145123571.307, "dur": 4.263, + "args": { + "External id": 514390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145123572.183, "dur": 3.304, + "args": { + "External id": 514391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145123579.570, "dur": 4.244, + "args": { + "External id": 514392,"Record function id": 0, "Ev Idx": 7511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145123580.835, "dur": 2.525, + "args": { + "External id": 514393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145123581.514, "dur": 1.455, + "args": { + "External id": 514394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145123582.009, "dur": 0.888, + "args": { + "External id": 514395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145123606.832, "dur": 10.043, + "args": { + "External id": 514396,"Record function id": 0, "Ev Idx": 7515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145123610.202, "dur": 5.659, + "args": { + "External id": 514397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145123611.728, "dur": 2.926, + "args": { + "External id": 514398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145123612.618, "dur": 1.721, + "args": { + "External id": 514399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145123622.735, "dur": 9.372, + "args": { + "External id": 514400,"Record function id": 0, "Ev Idx": 7519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145123624.789, "dur": 6.605, + "args": { + "External id": 514401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145123625.726, "dur": 5.036, + "args": { + "External id": 514402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145123629.242, "dur": 1.376, + "args": { + "External id": 514403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145123637.061, "dur": 6.198, + "args": { + "External id": 514404,"Record function id": 0, "Ev Idx": 7523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145123639.098, "dur": 3.448, + "args": { + "External id": 514405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145123640.068, "dur": 1.841, + "args": { + "External id": 514406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145123640.794, "dur": 0.948, + "args": { + "External id": 514407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145123648.055, "dur": 6.548, + "args": { + "External id": 514408,"Record function id": 0, "Ev Idx": 7527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145123649.790, "dur": 4.105, + "args": { + "External id": 514409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145123650.798, "dur": 2.237, + "args": { + "External id": 514410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145123651.682, "dur": 1.203, + "args": { + "External id": 514411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145123659.484, "dur": 6.683, + "args": { + "External id": 514412,"Record function id": 0, "Ev Idx": 7531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145123662.017, "dur": 3.456, + "args": { + "External id": 514413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145123663.075, "dur": 1.794, + "args": { + "External id": 514414,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145123663.834, "dur": 0.877, + "args": { + "External id": 514415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145123671.794, "dur": 289897.679, + "args": { + "External id": 514416,"Record function id": 0, "Sequence number": 5516290, "Fwd thread id": 1, "Ev Idx": 7535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145123674.002, "dur": 289885.191, + "args": { + "External id": 514417,"Sequence number": 5516290, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7536 + } + }, + { + "ph": "f", "id": 397, "pid": 2338711, "tid": 2379440, "ts": 6274145123674.002, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.25)", "pid": 2338711, "tid": 2379440, + "ts": 6274145123713.866, "dur": 44.072, + "args": { + "External id": 514418,"Record function id": 0, "Ev Idx": 7537 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.25)", "pid": 2338711, "tid": 2379440, + "ts": 6274145123766.971, "dur": 70.638, + "args": { + "External id": 514419,"Record function id": 0, "Ev Idx": 7538 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.25)", "pid": 2338711, "tid": 2379440, + "ts": 6274145123843.947, "dur": 289706.916, + "args": { + "External id": 514420,"Record function id": 0, "Ev Idx": 7539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145123903.422, "dur": 9.459, + "args": { + "External id": 514421,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145123925.790, "dur": 7.042, + "args": { + "External id": 514422,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274145123950.458, "dur": 288523.144, + "args": { + "External id": 514423,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274145123966.360, "dur": 288492.357, + "args": { + "External id": 514424,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145124141.872, "dur": 7.098, + "args": { + "External id": 514425,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145124173.677, "dur": 288230.829, + "args": { + "External id": 514426,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 7545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145124177.232, "dur": 288225.812, + "args": { + "External id": 514427,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 7546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145124181.619, "dur": 14.040, + "args": { + "External id": 514428,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145124197.696, "dur": 288197.993, + "args": { + "External id": 514429,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 7548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145412613.890, "dur": 21.624, + "args": { + "External id": 514430,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 7549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145412621.117, "dur": 13.627, + "args": { + "External id": 514431,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274145412680.316, "dur": 429.345, + "args": { + "External id": 514432,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 7551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145412715.149, "dur": 388.303, + "args": { + "External id": 514433,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7552, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274145412730.399, "dur": 366.199, + "args": { + "External id": 514434,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 7553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145413138.374, "dur": 2.911, + "args": { + "External id": 514435,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7554, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145413216.429, "dur": 7.965, + "args": { + "External id": 514436,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145413238.632, "dur": 41.955, + "args": { + "External id": 514437,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145413293.979, "dur": 1.900, + "args": { + "External id": 514438,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145413301.230, "dur": 13.265, + "args": { + "External id": 514439,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145413321.195, "dur": 1.187, + "args": { + "External id": 514440,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145413326.976, "dur": 13.451, + "args": { + "External id": 514441,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145413345.856, "dur": 1.422, + "args": { + "External id": 514442,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145413351.943, "dur": 12.886, + "args": { + "External id": 514443,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145413372.343, "dur": 1.084, + "args": { + "External id": 514444,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145413377.542, "dur": 13.832, + "args": { + "External id": 514445,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145413396.244, "dur": 3.579, + "args": { + "External id": 514446,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145413404.623, "dur": 11.647, + "args": { + "External id": 514447,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145413421.445, "dur": 0.899, + "args": { + "External id": 514448,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145413426.398, "dur": 13.626, + "args": { + "External id": 514449,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145413446.979, "dur": 0.901, + "args": { + "External id": 514450,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145413452.364, "dur": 11.514, + "args": { + "External id": 514451,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145413468.496, "dur": 1.138, + "args": { + "External id": 514452,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145413473.198, "dur": 12.555, + "args": { + "External id": 514453,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 7572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145413584.781, "dur": 3227.656, + "args": { + "External id": 514454,"Record function id": 0, "Ev Idx": 7573 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.24)", "pid": 2338711, "tid": 2379440, + "ts": 6274145413630.446, "dur": 1165.989, + "args": { + "External id": 514455,"Record function id": 0, "Ev Idx": 7574 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2338711, "tid": 2379440, + "ts": 6274145413653.630, "dur": 365.865, + "args": { + "External id": 514456,"Record function id": 0, "Ev Idx": 7575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145413746.861, "dur": 5.480, + "args": { + "External id": 514457,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145413756.589, "dur": 0.736, + "args": { + "External id": 514458,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145413759.234, "dur": 1.165, + "args": { + "External id": 514459,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145413762.353, "dur": 1.149, + "args": { + "External id": 514460,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145413767.108, "dur": 3.405, + "args": { + "External id": 514461,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145413772.267, "dur": 0.749, + "args": { + "External id": 514462,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145413774.538, "dur": 1.083, + "args": { + "External id": 514463,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145413784.690, "dur": 1.116, + "args": { + "External id": 514464,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145413789.382, "dur": 1.211, + "args": { + "External id": 514465,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145413791.800, "dur": 1.026, + "args": { + "External id": 514466,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145413812.674, "dur": 171.016, + "args": { + "External id": 514467,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145413829.774, "dur": 148.856, + "args": { + "External id": 514468,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145413854.239, "dur": 15.829, + "args": { + "External id": 514469,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145413873.914, "dur": 73.838, + "args": { + "External id": 514470,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 7589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145413876.834, "dur": 70.490, + "args": { + "External id": 514471,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 7590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145413880.857, "dur": 6.641, + "args": { + "External id": 514472,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145413891.641, "dur": 54.925, + "args": { + "External id": 514473,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 7592 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.23", "pid": 2338711, "tid": 2379440, + "ts": 6274145414151.403, "dur": 637.306, + "args": { + "External id": 514474,"Record function id": 0, "Ev Idx": 7593 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2338711, "tid": 2379440, + "ts": 6274145414170.997, "dur": 604.407, + "args": { + "External id": 514475,"Record function id": 0, "Ev Idx": 7594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145414230.646, "dur": 8.676, + "args": { + "External id": 514476,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274145414256.592, "dur": 33.647, + "args": { + "External id": 514477,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145414262.367, "dur": 1.919, + "args": { + "External id": 514478,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145414266.486, "dur": 2.051, + "args": { + "External id": 514479,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145414269.597, "dur": 0.597, + "args": { + "External id": 514480,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145414271.764, "dur": 0.493, + "args": { + "External id": 514481,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145414274.903, "dur": 0.440, + "args": { + "External id": 514482,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145414277.295, "dur": 0.441, + "args": { + "External id": 514483,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145414278.903, "dur": 0.620, + "args": { + "External id": 514484,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145414282.757, "dur": 2.305, + "args": { + "External id": 514485,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145414286.307, "dur": 0.276, + "args": { + "External id": 514486,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145414301.283, "dur": 45.753, + "args": { + "External id": 514487,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274145414379.288, "dur": 113.672, + "args": { + "External id": 514488,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 7607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145414390.835, "dur": 4.176, + "args": { + "External id": 514489,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274145414400.414, "dur": 11.184, + "args": { + "External id": 514490,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145414405.001, "dur": 6.141, + "args": { + "External id": 514491,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 7610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145414408.486, "dur": 1.129, + "args": { + "External id": 514492,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274145414418.757, "dur": 26.077, + "args": { + "External id": 514493,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145414420.791, "dur": 0.624, + "args": { + "External id": 514494,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145414423.979, "dur": 0.439, + "args": { + "External id": 514495,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145414425.899, "dur": 0.525, + "args": { + "External id": 514496,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145414427.707, "dur": 1.380, + "args": { + "External id": 514497,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145414430.662, "dur": 2.506, + "args": { + "External id": 514498,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145414434.465, "dur": 0.516, + "args": { + "External id": 514499,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145414438.036, "dur": 0.419, + "args": { + "External id": 514500,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145414439.251, "dur": 0.298, + "args": { + "External id": 514501,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145414441.199, "dur": 0.297, + "args": { + "External id": 514502,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145414455.041, "dur": 30.834, + "args": { + "External id": 514503,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274145414539.002, "dur": 150.729, + "args": { + "External id": 514504,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 7623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145414569.139, "dur": 116.392, + "args": { + "External id": 514505,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7624, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274145414581.726, "dur": 99.064, + "args": { + "External id": 514506,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 7625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145414711.988, "dur": 2.076, + "args": { + "External id": 514507,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7626, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145414804.965, "dur": 1983.560, + "args": { + "External id": 514508,"Sequence number": 5516289, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7627 + } + }, + { + "ph": "f", "id": 398, "pid": 2338711, "tid": 2379440, "ts": 6274145414804.965, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145414919.449, "dur": 115.847, + "args": { + "External id": 514509,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 7628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145415124.504, "dur": 45.213, + "args": { + "External id": 514510,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 7629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274145415190.539, "dur": 58.790, + "args": { + "External id": 514511,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 7630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145415260.655, "dur": 34.353, + "args": { + "External id": 514512,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145415304.140, "dur": 34.769, + "args": { + "External id": 514513,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145415347.058, "dur": 29.824, + "args": { + "External id": 514514,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145415384.643, "dur": 31.864, + "args": { + "External id": 514515,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145415442.531, "dur": 26.758, + "args": { + "External id": 514516,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 7635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145415488.257, "dur": 31.844, + "args": { + "External id": 514517,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145415545.610, "dur": 20.299, + "args": { + "External id": 514518,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145415580.615, "dur": 40.444, + "args": { + "External id": 514519,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145415635.460, "dur": 55.388, + "args": { + "External id": 514520,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145415695.852, "dur": 45.294, + "args": { + "External id": 514521,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145415777.021, "dur": 269.914, + "args": { + "External id": 514522,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145415863.583, "dur": 6.976, + "args": { + "External id": 514523,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145415872.438, "dur": 2.881, + "args": { + "External id": 514524,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145415876.485, "dur": 2.705, + "args": { + "External id": 514525,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145415880.344, "dur": 4.563, + "args": { + "External id": 514526,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145415936.177, "dur": 5.820, + "args": { + "External id": 514527,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145415938.366, "dur": 3.386, + "args": { + "External id": 514528,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145415944.172, "dur": 35.118, + "args": { + "External id": 514529,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145415949.737, "dur": 2.838, + "args": { + "External id": 514530,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145415980.870, "dur": 1.792, + "args": { + "External id": 514531,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145415981.787, "dur": 0.770, + "args": { + "External id": 514532,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145415984.552, "dur": 16.517, + "args": { + "External id": 514533,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145415987.138, "dur": 0.549, + "args": { + "External id": 514534,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145416130.945, "dur": 38.195, + "args": { + "External id": 514535,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145416200.383, "dur": 20.901, + "args": { + "External id": 514536,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145416233.372, "dur": 51.921, + "args": { + "External id": 514537,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145416291.955, "dur": 52.432, + "args": { + "External id": 514538,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145416360.802, "dur": 27.578, + "args": { + "External id": 514539,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145416393.931, "dur": 35.329, + "args": { + "External id": 514540,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145416438.868, "dur": 32.146, + "args": { + "External id": 514541,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145416476.930, "dur": 33.540, + "args": { + "External id": 514542,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274145416533.741, "dur": 27.124, + "args": { + "External id": 514543,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 7662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145416578.273, "dur": 45.876, + "args": { + "External id": 514544,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145416647.585, "dur": 26.859, + "args": { + "External id": 514545,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145416693.727, "dur": 16.695, + "args": { + "External id": 514546,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274145416732.569, "dur": 19.488, + "args": { + "External id": 514547,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 7666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145416836.456, "dur": 17.157, + "args": { + "External id": 514548,"Record function id": 0, "Ev Idx": 7667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145416839.754, "dur": 12.841, + "args": { + "External id": 514549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145416844.757, "dur": 6.884, + "args": { + "External id": 514550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145416847.216, "dur": 4.334, + "args": { + "External id": 514551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145416857.733, "dur": 5.171, + "args": { + "External id": 514552,"Record function id": 0, "Ev Idx": 7671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145416859.303, "dur": 3.175, + "args": { + "External id": 514553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145416860.250, "dur": 1.725, + "args": { + "External id": 514554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145416860.944, "dur": 0.905, + "args": { + "External id": 514555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145416866.173, "dur": 4.930, + "args": { + "External id": 514556,"Record function id": 0, "Ev Idx": 7675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145416867.616, "dur": 3.048, + "args": { + "External id": 514557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145416868.461, "dur": 1.815, + "args": { + "External id": 514558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145416869.361, "dur": 0.791, + "args": { + "External id": 514559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145416874.275, "dur": 30.708, + "args": { + "External id": 514560,"Record function id": 0, "Ev Idx": 7679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145416902.082, "dur": 2.481, + "args": { + "External id": 514561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145416902.993, "dur": 1.166, + "args": { + "External id": 514562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145416903.358, "dur": 0.700, + "args": { + "External id": 514563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145416908.175, "dur": 4.068, + "args": { + "External id": 514564,"Record function id": 0, "Ev Idx": 7683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145416909.610, "dur": 2.247, + "args": { + "External id": 514565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145416910.238, "dur": 1.092, + "args": { + "External id": 514566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145416910.549, "dur": 0.701, + "args": { + "External id": 514567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145416915.331, "dur": 6.257, + "args": { + "External id": 514568,"Record function id": 0, "Ev Idx": 7687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145416916.499, "dur": 4.700, + "args": { + "External id": 514569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145416917.143, "dur": 3.630, + "args": { + "External id": 514570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145416920.091, "dur": 0.566, + "args": { + "External id": 514571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145416925.022, "dur": 5.949, + "args": { + "External id": 514572,"Record function id": 0, "Ev Idx": 7691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145416926.345, "dur": 4.208, + "args": { + "External id": 514573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145416926.809, "dur": 3.357, + "args": { + "External id": 514574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145416927.078, "dur": 3.020, + "args": { + "External id": 514575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145416934.377, "dur": 3.561, + "args": { + "External id": 514576,"Record function id": 0, "Ev Idx": 7695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145416935.373, "dur": 2.172, + "args": { + "External id": 514577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145416935.862, "dur": 1.254, + "args": { + "External id": 514578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145416936.242, "dur": 0.783, + "args": { + "External id": 514579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145416941.538, "dur": 3.403, + "args": { + "External id": 514580,"Record function id": 0, "Ev Idx": 7699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145416942.633, "dur": 1.915, + "args": { + "External id": 514581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145416943.070, "dur": 1.088, + "args": { + "External id": 514582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145416943.422, "dur": 0.658, + "args": { + "External id": 514583,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145416948.759, "dur": 65079.535, + "args": { + "External id": 514584,"Record function id": 0, "Sequence number": 5516288, "Fwd thread id": 1, "Ev Idx": 7703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145416949.926, "dur": 65068.103, + "args": { + "External id": 514585,"Sequence number": 5516288, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7704 + } + }, + { + "ph": "f", "id": 399, "pid": 2338711, "tid": 2379440, "ts": 6274145416949.926, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.24)", "pid": 2338711, "tid": 2379440, + "ts": 6274145416984.078, "dur": 44.544, + "args": { + "External id": 514586,"Record function id": 0, "Ev Idx": 7705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.24)", "pid": 2338711, "tid": 2379440, + "ts": 6274145417037.607, "dur": 106.900, + "args": { + "External id": 514587,"Record function id": 0, "Ev Idx": 7706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.24)", "pid": 2338711, "tid": 2379440, + "ts": 6274145417153.052, "dur": 64856.614, + "args": { + "External id": 514588,"Record function id": 0, "Ev Idx": 7707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145417252.892, "dur": 8.748, + "args": { + "External id": 514589,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145417272.702, "dur": 5.232, + "args": { + "External id": 514590,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274145417295.097, "dur": 63689.315, + "args": { + "External id": 514591,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274145417309.416, "dur": 63661.225, + "args": { + "External id": 514592,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145417426.609, "dur": 19.887, + "args": { + "External id": 514593,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145417468.272, "dur": 63453.438, + "args": { + "External id": 514594,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 7713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145417471.202, "dur": 63449.148, + "args": { + "External id": 514595,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 7714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145417476.470, "dur": 10.075, + "args": { + "External id": 514596,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145417488.408, "dur": 63425.560, + "args": { + "External id": 514597,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 7716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145481134.997, "dur": 13.350, + "args": { + "External id": 514598,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 7717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145481138.797, "dur": 8.918, + "args": { + "External id": 514599,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274145481180.140, "dur": 376.028, + "args": { + "External id": 514600,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 7719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145481215.007, "dur": 335.936, + "args": { + "External id": 514601,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7720, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274145481230.707, "dur": 314.928, + "args": { + "External id": 514602,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 7721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145481579.557, "dur": 2.306, + "args": { + "External id": 514603,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7722, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145481679.477, "dur": 9.530, + "args": { + "External id": 514604,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145481702.218, "dur": 41.115, + "args": { + "External id": 514605,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145481754.268, "dur": 1.503, + "args": { + "External id": 514606,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145481761.034, "dur": 13.564, + "args": { + "External id": 514607,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145481780.318, "dur": 0.896, + "args": { + "External id": 514608,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145481787.930, "dur": 13.762, + "args": { + "External id": 514609,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145481806.813, "dur": 0.932, + "args": { + "External id": 514610,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145481811.885, "dur": 11.705, + "args": { + "External id": 514611,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145481828.037, "dur": 0.984, + "args": { + "External id": 514612,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145481832.232, "dur": 13.026, + "args": { + "External id": 514613,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145481849.710, "dur": 1.567, + "args": { + "External id": 514614,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145481855.727, "dur": 11.945, + "args": { + "External id": 514615,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145481874.291, "dur": 0.902, + "args": { + "External id": 514616,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145481884.584, "dur": 12.715, + "args": { + "External id": 514617,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145481904.094, "dur": 0.891, + "args": { + "External id": 514618,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145481908.652, "dur": 11.243, + "args": { + "External id": 514619,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145481923.952, "dur": 3.084, + "args": { + "External id": 514620,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145481933.588, "dur": 12.305, + "args": { + "External id": 514621,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 7740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145482045.872, "dur": 3172.094, + "args": { + "External id": 514622,"Record function id": 0, "Ev Idx": 7741 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.23)", "pid": 2338711, "tid": 2379440, + "ts": 6274145482104.059, "dur": 1145.574, + "args": { + "External id": 514623,"Record function id": 0, "Ev Idx": 7742 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2338711, "tid": 2379440, + "ts": 6274145482123.404, "dur": 336.212, + "args": { + "External id": 514624,"Record function id": 0, "Ev Idx": 7743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145482212.624, "dur": 6.947, + "args": { + "External id": 514625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145482223.384, "dur": 0.980, + "args": { + "External id": 514626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145482226.423, "dur": 0.803, + "args": { + "External id": 514627,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145482228.973, "dur": 0.624, + "args": { + "External id": 514628,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145482231.233, "dur": 0.726, + "args": { + "External id": 514629,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145482233.321, "dur": 0.982, + "args": { + "External id": 514630,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145482235.898, "dur": 0.671, + "args": { + "External id": 514631,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145482240.001, "dur": 3.557, + "args": { + "External id": 514632,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145482245.234, "dur": 0.876, + "args": { + "External id": 514633,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145482247.375, "dur": 0.869, + "args": { + "External id": 514634,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145482266.730, "dur": 160.599, + "args": { + "External id": 514635,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145482283.580, "dur": 139.042, + "args": { + "External id": 514636,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145482307.516, "dur": 14.863, + "args": { + "External id": 514637,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145482326.429, "dur": 66.933, + "args": { + "External id": 514638,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 7757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145482329.142, "dur": 63.930, + "args": { + "External id": 514639,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 7758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145482333.287, "dur": 6.304, + "args": { + "External id": 514640,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145482341.289, "dur": 50.974, + "args": { + "External id": 514641,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 7760 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.22", "pid": 2338711, "tid": 2379440, + "ts": 6274145482544.070, "dur": 697.414, + "args": { + "External id": 514642,"Record function id": 0, "Ev Idx": 7761 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2338711, "tid": 2379440, + "ts": 6274145482561.119, "dur": 666.806, + "args": { + "External id": 514643,"Record function id": 0, "Ev Idx": 7762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145482648.182, "dur": 8.332, + "args": { + "External id": 514644,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274145482676.012, "dur": 33.436, + "args": { + "External id": 514645,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145482681.689, "dur": 2.894, + "args": { + "External id": 514646,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145482686.719, "dur": 0.522, + "args": { + "External id": 514647,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145482688.445, "dur": 2.929, + "args": { + "External id": 514648,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145482694.355, "dur": 0.380, + "args": { + "External id": 514649,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145482695.908, "dur": 0.557, + "args": { + "External id": 514650,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145482697.785, "dur": 0.300, + "args": { + "External id": 514651,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145482700.675, "dur": 0.312, + "args": { + "External id": 514652,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145482702.828, "dur": 0.275, + "args": { + "External id": 514653,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145482704.173, "dur": 1.603, + "args": { + "External id": 514654,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145482721.834, "dur": 46.891, + "args": { + "External id": 514655,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274145482802.638, "dur": 121.487, + "args": { + "External id": 514656,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 7775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145482813.646, "dur": 4.133, + "args": { + "External id": 514657,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274145482822.951, "dur": 12.608, + "args": { + "External id": 514658,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145482827.555, "dur": 7.569, + "args": { + "External id": 514659,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 7778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145482831.019, "dur": 2.787, + "args": { + "External id": 514660,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274145482842.852, "dur": 28.696, + "args": { + "External id": 514661,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145482844.717, "dur": 0.465, + "args": { + "External id": 514662,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145482846.930, "dur": 0.477, + "args": { + "External id": 514663,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145482849.224, "dur": 1.709, + "args": { + "External id": 514664,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145482851.987, "dur": 0.539, + "args": { + "External id": 514665,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145482854.105, "dur": 0.428, + "args": { + "External id": 514666,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145482857.526, "dur": 0.300, + "args": { + "External id": 514667,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145482859.253, "dur": 0.358, + "args": { + "External id": 514668,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145482861.480, "dur": 2.492, + "args": { + "External id": 514669,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145482867.906, "dur": 0.296, + "args": { + "External id": 514670,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145482884.092, "dur": 32.573, + "args": { + "External id": 514671,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274145482972.094, "dur": 171.004, + "args": { + "External id": 514672,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 7791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145483002.747, "dur": 135.874, + "args": { + "External id": 514673,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7792, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274145483014.598, "dur": 118.698, + "args": { + "External id": 514674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 7793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145483163.158, "dur": 2.194, + "args": { + "External id": 514675,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7794, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145483257.443, "dur": 1936.150, + "args": { + "External id": 514676,"Sequence number": 5516287, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7795 + } + }, + { + "ph": "f", "id": 400, "pid": 2338711, "tid": 2379440, "ts": 6274145483257.443, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145483372.274, "dur": 113.137, + "args": { + "External id": 514677,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 7796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145483530.101, "dur": 41.128, + "args": { + "External id": 514678,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 7797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274145483616.528, "dur": 74.446, + "args": { + "External id": 514679,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 7798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145483707.272, "dur": 41.434, + "args": { + "External id": 514680,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145483756.128, "dur": 34.562, + "args": { + "External id": 514681,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145483797.161, "dur": 29.734, + "args": { + "External id": 514682,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145483836.812, "dur": 30.452, + "args": { + "External id": 514683,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145483897.376, "dur": 27.032, + "args": { + "External id": 514684,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 7803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145483943.993, "dur": 28.319, + "args": { + "External id": 514685,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145483994.560, "dur": 20.955, + "args": { + "External id": 514686,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145484029.776, "dur": 16.174, + "args": { + "External id": 514687,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145484095.322, "dur": 48.531, + "args": { + "External id": 514688,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145484147.983, "dur": 35.104, + "args": { + "External id": 514689,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145484217.904, "dur": 263.602, + "args": { + "External id": 514690,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145484304.697, "dur": 6.268, + "args": { + "External id": 514691,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145484313.066, "dur": 3.241, + "args": { + "External id": 514692,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145484318.051, "dur": 4.800, + "args": { + "External id": 514693,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145484323.932, "dur": 2.061, + "args": { + "External id": 514694,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145484372.647, "dur": 9.414, + "args": { + "External id": 514695,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145484378.688, "dur": 3.147, + "args": { + "External id": 514696,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145484384.305, "dur": 33.581, + "args": { + "External id": 514697,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145484390.713, "dur": 2.034, + "args": { + "External id": 514698,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145484419.498, "dur": 1.869, + "args": { + "External id": 514699,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145484420.761, "dur": 0.519, + "args": { + "External id": 514700,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145484422.582, "dur": 15.777, + "args": { + "External id": 514701,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145484424.998, "dur": 0.401, + "args": { + "External id": 514702,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145484520.145, "dur": 27.702, + "args": { + "External id": 514703,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145484567.479, "dur": 56.774, + "args": { + "External id": 514704,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145484651.005, "dur": 61.611, + "args": { + "External id": 514705,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145484722.992, "dur": 41.597, + "args": { + "External id": 514706,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145484775.745, "dur": 23.170, + "args": { + "External id": 514707,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145484803.929, "dur": 31.853, + "args": { + "External id": 514708,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145484842.321, "dur": 27.835, + "args": { + "External id": 514709,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145484876.294, "dur": 30.976, + "args": { + "External id": 514710,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274145484937.734, "dur": 27.947, + "args": { + "External id": 514711,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 7830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145484983.064, "dur": 26.725, + "args": { + "External id": 514712,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145485028.117, "dur": 19.984, + "args": { + "External id": 514713,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145485098.989, "dur": 20.675, + "args": { + "External id": 514714,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274145485137.873, "dur": 20.000, + "args": { + "External id": 514715,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 7834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145485242.154, "dur": 17.055, + "args": { + "External id": 514716,"Record function id": 0, "Ev Idx": 7835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145485246.140, "dur": 12.013, + "args": { + "External id": 514717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145485250.848, "dur": 6.227, + "args": { + "External id": 514718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145485252.430, "dur": 4.555, + "args": { + "External id": 514719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145485263.153, "dur": 7.913, + "args": { + "External id": 514720,"Record function id": 0, "Ev Idx": 7839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145485265.256, "dur": 5.380, + "args": { + "External id": 514721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145485265.842, "dur": 4.318, + "args": { + "External id": 514722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145485266.579, "dur": 3.501, + "args": { + "External id": 514723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145485274.351, "dur": 4.966, + "args": { + "External id": 514724,"Record function id": 0, "Ev Idx": 7843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145485275.805, "dur": 3.094, + "args": { + "External id": 514725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145485276.641, "dur": 1.787, + "args": { + "External id": 514726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145485277.380, "dur": 0.944, + "args": { + "External id": 514727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145485282.531, "dur": 4.278, + "args": { + "External id": 514728,"Record function id": 0, "Ev Idx": 7847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145485284.111, "dur": 2.318, + "args": { + "External id": 514729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145485284.785, "dur": 1.204, + "args": { + "External id": 514730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145485285.309, "dur": 0.596, + "args": { + "External id": 514731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145485289.854, "dur": 5.246, + "args": { + "External id": 514732,"Record function id": 0, "Ev Idx": 7851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145485291.676, "dur": 2.986, + "args": { + "External id": 514733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145485292.785, "dur": 1.424, + "args": { + "External id": 514734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145485293.285, "dur": 0.837, + "args": { + "External id": 514735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145485298.078, "dur": 4.315, + "args": { + "External id": 514736,"Record function id": 0, "Ev Idx": 7855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145485299.529, "dur": 2.462, + "args": { + "External id": 514737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145485300.341, "dur": 1.245, + "args": { + "External id": 514738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145485300.939, "dur": 0.570, + "args": { + "External id": 514739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145485305.448, "dur": 3.371, + "args": { + "External id": 514740,"Record function id": 0, "Ev Idx": 7859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145485306.588, "dur": 1.828, + "args": { + "External id": 514741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145485307.063, "dur": 0.951, + "args": { + "External id": 514742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145485307.322, "dur": 0.617, + "args": { + "External id": 514743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145485311.765, "dur": 6.086, + "args": { + "External id": 514744,"Record function id": 0, "Ev Idx": 7863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145485312.894, "dur": 4.559, + "args": { + "External id": 514745,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145485313.487, "dur": 3.228, + "args": { + "External id": 514746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145485315.972, "dur": 0.681, + "args": { + "External id": 514747,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145485320.773, "dur": 6.349, + "args": { + "External id": 514748,"Record function id": 0, "Ev Idx": 7867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145485321.824, "dur": 4.895, + "args": { + "External id": 514749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145485322.449, "dur": 3.766, + "args": { + "External id": 514750,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145485323.021, "dur": 3.121, + "args": { + "External id": 514751,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145485331.108, "dur": 62612.515, + "args": { + "External id": 514752,"Record function id": 0, "Sequence number": 5516286, "Fwd thread id": 1, "Ev Idx": 7871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145485332.252, "dur": 62601.454, + "args": { + "External id": 514753,"Sequence number": 5516286, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7872 + } + }, + { + "ph": "f", "id": 401, "pid": 2338711, "tid": 2379440, "ts": 6274145485332.252, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.23)", "pid": 2338711, "tid": 2379440, + "ts": 6274145485367.673, "dur": 44.194, + "args": { + "External id": 514754,"Record function id": 0, "Ev Idx": 7873 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.23)", "pid": 2338711, "tid": 2379440, + "ts": 6274145485421.359, "dur": 69.732, + "args": { + "External id": 514755,"Record function id": 0, "Ev Idx": 7874 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.23)", "pid": 2338711, "tid": 2379440, + "ts": 6274145485497.304, "dur": 62427.762, + "args": { + "External id": 514756,"Record function id": 0, "Ev Idx": 7875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145485610.132, "dur": 10.526, + "args": { + "External id": 514757,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145485635.681, "dur": 6.152, + "args": { + "External id": 514758,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274145485661.048, "dur": 61198.841, + "args": { + "External id": 514759,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274145485679.442, "dur": 61165.839, + "args": { + "External id": 514760,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145485794.850, "dur": 20.220, + "args": { + "External id": 514761,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145485837.644, "dur": 60957.876, + "args": { + "External id": 514762,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 7881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145485840.806, "dur": 60953.497, + "args": { + "External id": 514763,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 7882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145485847.136, "dur": 8.804, + "args": { + "External id": 514764,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145485857.854, "dur": 60929.458, + "args": { + "External id": 514765,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 7884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145546983.314, "dur": 14.120, + "args": { + "External id": 514766,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 7885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145546987.188, "dur": 9.820, + "args": { + "External id": 514767,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274145547029.935, "dur": 414.875, + "args": { + "External id": 514768,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 7887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145547095.388, "dur": 343.962, + "args": { + "External id": 514769,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7888, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274145547111.207, "dur": 322.275, + "args": { + "External id": 514770,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 7889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145547469.708, "dur": 2.542, + "args": { + "External id": 514771,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7890, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145547536.409, "dur": 7.248, + "args": { + "External id": 514772,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145547557.562, "dur": 60.489, + "args": { + "External id": 514773,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145547634.813, "dur": 3.052, + "args": { + "External id": 514774,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145547645.452, "dur": 21.835, + "args": { + "External id": 514775,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145547675.577, "dur": 4.050, + "args": { + "External id": 514776,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145547694.585, "dur": 18.014, + "args": { + "External id": 514777,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145547722.399, "dur": 1.373, + "args": { + "External id": 514778,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145547729.739, "dur": 12.007, + "args": { + "External id": 514779,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145547746.750, "dur": 1.038, + "args": { + "External id": 514780,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145547752.123, "dur": 12.363, + "args": { + "External id": 514781,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145547769.019, "dur": 2.456, + "args": { + "External id": 514782,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145547775.778, "dur": 11.663, + "args": { + "External id": 514783,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145547794.154, "dur": 0.933, + "args": { + "External id": 514784,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145547798.648, "dur": 13.366, + "args": { + "External id": 514785,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145547816.243, "dur": 1.002, + "args": { + "External id": 514786,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145547821.288, "dur": 11.546, + "args": { + "External id": 514787,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145547836.594, "dur": 0.903, + "args": { + "External id": 514788,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145547841.067, "dur": 13.472, + "args": { + "External id": 514789,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 7908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145547962.244, "dur": 3162.823, + "args": { + "External id": 514790,"Record function id": 0, "Ev Idx": 7909 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.22)", "pid": 2338711, "tid": 2379440, + "ts": 6274145547986.247, "dur": 1215.273, + "args": { + "External id": 514791,"Record function id": 0, "Ev Idx": 7910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2338711, "tid": 2379440, + "ts": 6274145548005.602, "dur": 393.120, + "args": { + "External id": 514792,"Record function id": 0, "Ev Idx": 7911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145548134.413, "dur": 5.480, + "args": { + "External id": 514793,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145548144.617, "dur": 3.806, + "args": { + "External id": 514794,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145548150.135, "dur": 0.973, + "args": { + "External id": 514795,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145548153.236, "dur": 0.977, + "args": { + "External id": 514796,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145548155.773, "dur": 0.961, + "args": { + "External id": 514797,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145548158.280, "dur": 0.875, + "args": { + "External id": 514798,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145548160.828, "dur": 0.821, + "args": { + "External id": 514799,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145548165.260, "dur": 1.313, + "args": { + "External id": 514800,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145548167.906, "dur": 0.982, + "args": { + "External id": 514801,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145548170.306, "dur": 2.971, + "args": { + "External id": 514802,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145548197.312, "dur": 166.997, + "args": { + "External id": 514803,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145548214.889, "dur": 144.544, + "args": { + "External id": 514804,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145548232.457, "dur": 14.599, + "args": { + "External id": 514805,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145548257.410, "dur": 72.675, + "args": { + "External id": 514806,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 7925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145548260.155, "dur": 69.475, + "args": { + "External id": 514807,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 7926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145548266.989, "dur": 6.535, + "args": { + "External id": 514808,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145548275.680, "dur": 53.385, + "args": { + "External id": 514809,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 7928 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.21", "pid": 2338711, "tid": 2379440, + "ts": 6274145548484.699, "dur": 707.818, + "args": { + "External id": 514810,"Record function id": 0, "Ev Idx": 7929 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2338711, "tid": 2379440, + "ts": 6274145548502.697, "dur": 674.727, + "args": { + "External id": 514811,"Record function id": 0, "Ev Idx": 7930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145548560.620, "dur": 4.880, + "args": { + "External id": 514812,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274145548582.082, "dur": 59.429, + "args": { + "External id": 514813,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145548605.356, "dur": 4.610, + "args": { + "External id": 514814,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145548613.010, "dur": 0.597, + "args": { + "External id": 514815,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145548615.740, "dur": 0.702, + "args": { + "External id": 514816,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145548619.819, "dur": 0.817, + "args": { + "External id": 514817,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145548622.224, "dur": 2.748, + "args": { + "External id": 514818,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145548626.362, "dur": 0.440, + "args": { + "External id": 514819,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145548630.188, "dur": 0.470, + "args": { + "External id": 514820,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145548632.087, "dur": 0.797, + "args": { + "External id": 514821,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145548634.351, "dur": 2.333, + "args": { + "External id": 514822,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145548660.413, "dur": 51.399, + "args": { + "External id": 514823,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274145548749.022, "dur": 116.857, + "args": { + "External id": 514824,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 7943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145548760.902, "dur": 6.559, + "args": { + "External id": 514825,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274145548772.880, "dur": 11.379, + "args": { + "External id": 514826,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145548777.804, "dur": 6.023, + "args": { + "External id": 514827,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 7946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145548781.194, "dur": 0.793, + "args": { + "External id": 514828,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274145548792.574, "dur": 22.754, + "args": { + "External id": 514829,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145548794.413, "dur": 0.479, + "args": { + "External id": 514830,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145548795.822, "dur": 3.623, + "args": { + "External id": 514831,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145548800.613, "dur": 0.478, + "args": { + "External id": 514832,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145548801.809, "dur": 0.424, + "args": { + "External id": 514833,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145548805.524, "dur": 0.377, + "args": { + "External id": 514834,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145548806.634, "dur": 0.449, + "args": { + "External id": 514835,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145548807.964, "dur": 0.362, + "args": { + "External id": 514836,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145548810.242, "dur": 0.376, + "args": { + "External id": 514837,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145548811.387, "dur": 0.415, + "args": { + "External id": 514838,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145548825.680, "dur": 30.362, + "args": { + "External id": 514839,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274145548915.894, "dur": 135.394, + "args": { + "External id": 514840,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 7959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145548950.973, "dur": 96.309, + "args": { + "External id": 514841,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7960, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274145548961.658, "dur": 80.968, + "args": { + "External id": 514842,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 7961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145549110.189, "dur": 2.978, + "args": { + "External id": 514843,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7962, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145549218.917, "dur": 1881.292, + "args": { + "External id": 514844,"Sequence number": 5516285, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7963 + } + }, + { + "ph": "f", "id": 402, "pid": 2338711, "tid": 2379440, "ts": 6274145549218.917, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145549336.292, "dur": 119.016, + "args": { + "External id": 514845,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 7964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145549506.741, "dur": 42.004, + "args": { + "External id": 514846,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 7965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274145549568.213, "dur": 86.802, + "args": { + "External id": 514847,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 7966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145549673.134, "dur": 38.526, + "args": { + "External id": 514848,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145549719.562, "dur": 34.013, + "args": { + "External id": 514849,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145549760.654, "dur": 29.323, + "args": { + "External id": 514850,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145549799.525, "dur": 31.490, + "args": { + "External id": 514851,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145549863.677, "dur": 26.613, + "args": { + "External id": 514852,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 7971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145549912.610, "dur": 30.848, + "args": { + "External id": 514853,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145549963.785, "dur": 18.440, + "args": { + "External id": 514854,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145549999.118, "dur": 14.212, + "args": { + "External id": 514855,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145550022.419, "dur": 74.346, + "args": { + "External id": 514856,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145550103.675, "dur": 39.607, + "args": { + "External id": 514857,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145550177.557, "dur": 255.895, + "args": { + "External id": 514858,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145550262.177, "dur": 6.224, + "args": { + "External id": 514859,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145550270.225, "dur": 2.348, + "args": { + "External id": 514860,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145550273.660, "dur": 2.269, + "args": { + "External id": 514861,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145550277.161, "dur": 6.173, + "args": { + "External id": 514862,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145550331.490, "dur": 5.195, + "args": { + "External id": 514863,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145550333.620, "dur": 2.824, + "args": { + "External id": 514864,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145550339.062, "dur": 32.211, + "args": { + "External id": 514865,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145550344.608, "dur": 1.802, + "args": { + "External id": 514866,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145550372.721, "dur": 1.464, + "args": { + "External id": 514867,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145550373.528, "dur": 0.542, + "args": { + "External id": 514868,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145550377.594, "dur": 14.449, + "args": { + "External id": 514869,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145550380.000, "dur": 0.581, + "args": { + "External id": 514870,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145550482.135, "dur": 38.194, + "args": { + "External id": 514871,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145550544.050, "dur": 17.020, + "args": { + "External id": 514872,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145550570.888, "dur": 67.762, + "args": { + "External id": 514873,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145550647.751, "dur": 42.180, + "args": { + "External id": 514874,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145550704.847, "dur": 21.617, + "args": { + "External id": 514875,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145550731.979, "dur": 32.458, + "args": { + "External id": 514876,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145550771.662, "dur": 28.606, + "args": { + "External id": 514877,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145550806.804, "dur": 30.460, + "args": { + "External id": 514878,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274145550860.838, "dur": 24.380, + "args": { + "External id": 514879,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 7998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145550904.174, "dur": 27.946, + "args": { + "External id": 514880,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145550949.342, "dur": 18.877, + "args": { + "External id": 514881,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145550987.319, "dur": 15.521, + "args": { + "External id": 514882,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274145551015.773, "dur": 19.013, + "args": { + "External id": 514883,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145551150.708, "dur": 16.788, + "args": { + "External id": 514884,"Record function id": 0, "Ev Idx": 8003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145551154.474, "dur": 12.040, + "args": { + "External id": 514885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145551159.438, "dur": 6.033, + "args": { + "External id": 514886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145551160.991, "dur": 4.385, + "args": { + "External id": 514887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145551171.434, "dur": 4.973, + "args": { + "External id": 514888,"Record function id": 0, "Ev Idx": 8007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145551172.991, "dur": 3.031, + "args": { + "External id": 514889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145551173.841, "dur": 1.701, + "args": { + "External id": 514890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145551174.304, "dur": 1.136, + "args": { + "External id": 514891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145551179.743, "dur": 7.300, + "args": { + "External id": 514892,"Record function id": 0, "Ev Idx": 8011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145551181.183, "dur": 5.367, + "args": { + "External id": 514893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145551181.985, "dur": 4.080, + "args": { + "External id": 514894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145551182.582, "dur": 3.412, + "args": { + "External id": 514895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145551190.267, "dur": 4.095, + "args": { + "External id": 514896,"Record function id": 0, "Ev Idx": 8015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145551191.622, "dur": 2.346, + "args": { + "External id": 514897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145551192.131, "dur": 1.403, + "args": { + "External id": 514898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145551192.794, "dur": 0.655, + "args": { + "External id": 514899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145551197.454, "dur": 3.967, + "args": { + "External id": 514900,"Record function id": 0, "Ev Idx": 8019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145551198.556, "dur": 2.471, + "args": { + "External id": 514901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145551199.009, "dur": 1.546, + "args": { + "External id": 514902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145551199.488, "dur": 0.991, + "args": { + "External id": 514903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145551204.460, "dur": 6.704, + "args": { + "External id": 514904,"Record function id": 0, "Ev Idx": 8023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145551205.761, "dur": 5.026, + "args": { + "External id": 514905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145551206.286, "dur": 4.068, + "args": { + "External id": 514906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145551209.442, "dur": 0.840, + "args": { + "External id": 514907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145551214.374, "dur": 4.064, + "args": { + "External id": 514908,"Record function id": 0, "Ev Idx": 8027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145551215.381, "dur": 2.613, + "args": { + "External id": 514909,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145551216.142, "dur": 1.451, + "args": { + "External id": 514910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145551216.779, "dur": 0.738, + "args": { + "External id": 514911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145551221.522, "dur": 4.145, + "args": { + "External id": 514912,"Record function id": 0, "Ev Idx": 8031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145551222.935, "dur": 2.305, + "args": { + "External id": 514913,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145551223.456, "dur": 1.326, + "args": { + "External id": 514914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145551224.045, "dur": 0.663, + "args": { + "External id": 514915,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145551228.669, "dur": 4.468, + "args": { + "External id": 514916,"Record function id": 0, "Ev Idx": 8035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145551229.922, "dur": 2.811, + "args": { + "External id": 514917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145551230.628, "dur": 1.623, + "args": { + "External id": 514918,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145551231.061, "dur": 1.110, + "args": { + "External id": 514919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145551237.146, "dur": 63622.825, + "args": { + "External id": 514920,"Record function id": 0, "Sequence number": 5516284, "Fwd thread id": 1, "Ev Idx": 8039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145551238.556, "dur": 63610.782, + "args": { + "External id": 514921,"Sequence number": 5516284, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8040 + } + }, + { + "ph": "f", "id": 403, "pid": 2338711, "tid": 2379440, "ts": 6274145551238.556, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.22)", "pid": 2338711, "tid": 2379440, + "ts": 6274145551275.909, "dur": 43.160, + "args": { + "External id": 514922,"Record function id": 0, "Ev Idx": 8041 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.22)", "pid": 2338711, "tid": 2379440, + "ts": 6274145551327.275, "dur": 66.568, + "args": { + "External id": 514923,"Record function id": 0, "Ev Idx": 8042 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.22)", "pid": 2338711, "tid": 2379440, + "ts": 6274145551399.863, "dur": 63440.544, + "args": { + "External id": 514924,"Record function id": 0, "Ev Idx": 8043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145551497.198, "dur": 7.620, + "args": { + "External id": 514925,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145551514.581, "dur": 7.761, + "args": { + "External id": 514926,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274145551541.162, "dur": 62273.174, + "args": { + "External id": 514927,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274145551555.992, "dur": 62243.812, + "args": { + "External id": 514928,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145551693.274, "dur": 27.373, + "args": { + "External id": 514929,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145551748.578, "dur": 62002.933, + "args": { + "External id": 514930,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145551751.860, "dur": 61998.408, + "args": { + "External id": 514931,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145551758.713, "dur": 13.497, + "args": { + "External id": 514932,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145551776.780, "dur": 61966.665, + "args": { + "External id": 514933,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145613938.864, "dur": 13.982, + "args": { + "External id": 514934,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145613943.042, "dur": 9.413, + "args": { + "External id": 514935,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274145613989.680, "dur": 389.367, + "args": { + "External id": 514936,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145614024.123, "dur": 348.781, + "args": { + "External id": 514937,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8056, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274145614037.838, "dur": 327.986, + "args": { + "External id": 514938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145614408.227, "dur": 2.848, + "args": { + "External id": 514939,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8058, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145614476.814, "dur": 7.175, + "args": { + "External id": 514940,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145614496.874, "dur": 37.752, + "args": { + "External id": 514941,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145614546.203, "dur": 1.750, + "args": { + "External id": 514942,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145614553.811, "dur": 13.939, + "args": { + "External id": 514943,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145614573.449, "dur": 1.320, + "args": { + "External id": 514944,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145614579.406, "dur": 35.585, + "args": { + "External id": 514945,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145614626.227, "dur": 1.909, + "args": { + "External id": 514946,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145614634.218, "dur": 16.619, + "args": { + "External id": 514947,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145614658.233, "dur": 3.552, + "args": { + "External id": 514948,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145614666.698, "dur": 14.925, + "args": { + "External id": 514949,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145614687.191, "dur": 1.519, + "args": { + "External id": 514950,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145614695.570, "dur": 11.360, + "args": { + "External id": 514951,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145614711.826, "dur": 0.905, + "args": { + "External id": 514952,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145614716.184, "dur": 12.899, + "args": { + "External id": 514953,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145614733.579, "dur": 0.851, + "args": { + "External id": 514954,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145614738.382, "dur": 11.472, + "args": { + "External id": 514955,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145614756.413, "dur": 0.788, + "args": { + "External id": 514956,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145614760.937, "dur": 12.284, + "args": { + "External id": 514957,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145614877.883, "dur": 3156.051, + "args": { + "External id": 514958,"Record function id": 0, "Ev Idx": 8077 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.21)", "pid": 2338711, "tid": 2379440, + "ts": 6274145614899.578, "dur": 1207.450, + "args": { + "External id": 514959,"Record function id": 0, "Ev Idx": 8078 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2338711, "tid": 2379440, + "ts": 6274145614916.611, "dur": 386.440, + "args": { + "External id": 514960,"Record function id": 0, "Ev Idx": 8079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145615003.686, "dur": 4.556, + "args": { + "External id": 514961,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145615012.053, "dur": 0.939, + "args": { + "External id": 514962,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145615015.126, "dur": 0.893, + "args": { + "External id": 514963,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145615018.052, "dur": 3.109, + "args": { + "External id": 514964,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145615022.844, "dur": 0.775, + "args": { + "External id": 514965,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145615025.532, "dur": 0.813, + "args": { + "External id": 514966,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145615030.543, "dur": 0.846, + "args": { + "External id": 514967,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145615033.243, "dur": 1.323, + "args": { + "External id": 514968,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145615036.071, "dur": 0.879, + "args": { + "External id": 514969,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145615038.496, "dur": 0.913, + "args": { + "External id": 514970,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145615097.091, "dur": 170.112, + "args": { + "External id": 514971,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145615116.498, "dur": 145.532, + "args": { + "External id": 514972,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145615137.053, "dur": 15.584, + "args": { + "External id": 514973,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145615156.933, "dur": 75.401, + "args": { + "External id": 514974,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145615159.691, "dur": 72.221, + "args": { + "External id": 514975,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145615166.533, "dur": 8.387, + "args": { + "External id": 514976,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145615176.731, "dur": 54.453, + "args": { + "External id": 514977,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8096 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.20", "pid": 2338711, "tid": 2379440, + "ts": 6274145615387.152, "dur": 709.448, + "args": { + "External id": 514978,"Record function id": 0, "Ev Idx": 8097 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2338711, "tid": 2379440, + "ts": 6274145615405.594, "dur": 636.890, + "args": { + "External id": 514979,"Record function id": 0, "Ev Idx": 8098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145615462.237, "dur": 5.541, + "args": { + "External id": 514980,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274145615484.712, "dur": 31.549, + "args": { + "External id": 514981,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145615489.862, "dur": 2.124, + "args": { + "External id": 514982,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145615493.827, "dur": 1.931, + "args": { + "External id": 514983,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145615497.198, "dur": 0.725, + "args": { + "External id": 514984,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145615498.934, "dur": 0.531, + "args": { + "External id": 514985,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145615501.860, "dur": 0.507, + "args": { + "External id": 514986,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145615503.729, "dur": 0.380, + "args": { + "External id": 514987,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145615505.529, "dur": 2.928, + "args": { + "External id": 514988,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145615510.472, "dur": 0.546, + "args": { + "External id": 514989,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145615512.249, "dur": 0.456, + "args": { + "External id": 514990,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145615526.583, "dur": 46.563, + "args": { + "External id": 514991,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274145615639.490, "dur": 144.758, + "args": { + "External id": 514992,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145615653.688, "dur": 6.560, + "args": { + "External id": 514993,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274145615666.623, "dur": 13.931, + "args": { + "External id": 514994,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145615671.890, "dur": 8.024, + "args": { + "External id": 514995,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145615676.704, "dur": 1.027, + "args": { + "External id": 514996,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274145615691.518, "dur": 33.559, + "args": { + "External id": 514997,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145615694.142, "dur": 0.435, + "args": { + "External id": 514998,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145615698.255, "dur": 0.899, + "args": { + "External id": 514999,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145615701.130, "dur": 0.557, + "args": { + "External id": 515000,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145615703.538, "dur": 4.338, + "args": { + "External id": 515001,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145615709.554, "dur": 0.450, + "args": { + "External id": 515002,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145615711.958, "dur": 0.545, + "args": { + "External id": 515003,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145615715.797, "dur": 0.435, + "args": { + "External id": 515004,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145615718.077, "dur": 0.598, + "args": { + "External id": 515005,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145615720.218, "dur": 0.355, + "args": { + "External id": 515006,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145615740.965, "dur": 35.416, + "args": { + "External id": 515007,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274145615835.312, "dur": 129.836, + "args": { + "External id": 515008,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145615867.008, "dur": 94.502, + "args": { + "External id": 515009,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8128, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274145615877.546, "dur": 79.275, + "args": { + "External id": 515010,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145615984.392, "dur": 2.059, + "args": { + "External id": 515011,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8130, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145616116.402, "dur": 1893.203, + "args": { + "External id": 515012,"Sequence number": 5516283, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8131 + } + }, + { + "ph": "f", "id": 404, "pid": 2338711, "tid": 2379440, "ts": 6274145616116.402, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145616240.989, "dur": 120.804, + "args": { + "External id": 515013,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145616416.784, "dur": 40.233, + "args": { + "External id": 515014,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274145616475.322, "dur": 49.925, + "args": { + "External id": 515015,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145616536.444, "dur": 31.678, + "args": { + "External id": 515016,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145616576.634, "dur": 61.757, + "args": { + "External id": 515017,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145616653.065, "dur": 40.369, + "args": { + "External id": 515018,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145616701.870, "dur": 35.856, + "args": { + "External id": 515019,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145616769.882, "dur": 25.289, + "args": { + "External id": 515020,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145616817.938, "dur": 30.591, + "args": { + "External id": 515021,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145616870.681, "dur": 18.884, + "args": { + "External id": 515022,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145616904.421, "dur": 15.117, + "args": { + "External id": 515023,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145616926.801, "dur": 37.692, + "args": { + "External id": 515024,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145616967.574, "dur": 33.886, + "args": { + "External id": 515025,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145617035.539, "dur": 310.700, + "args": { + "External id": 515026,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145617161.583, "dur": 8.186, + "args": { + "External id": 515027,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145617172.249, "dur": 3.402, + "args": { + "External id": 515028,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145617176.823, "dur": 2.557, + "args": { + "External id": 515029,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145617180.269, "dur": 3.508, + "args": { + "External id": 515030,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145617237.447, "dur": 5.438, + "args": { + "External id": 515031,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145617239.543, "dur": 3.100, + "args": { + "External id": 515032,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145617245.158, "dur": 35.176, + "args": { + "External id": 515033,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145617250.865, "dur": 2.167, + "args": { + "External id": 515034,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145617281.933, "dur": 1.196, + "args": { + "External id": 515035,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145617282.460, "dur": 0.581, + "args": { + "External id": 515036,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145617284.014, "dur": 17.711, + "args": { + "External id": 515037,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145617286.193, "dur": 2.602, + "args": { + "External id": 515038,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145617383.617, "dur": 27.333, + "args": { + "External id": 515039,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145617430.398, "dur": 16.799, + "args": { + "External id": 515040,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145617455.507, "dur": 46.469, + "args": { + "External id": 515041,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145617508.839, "dur": 56.877, + "args": { + "External id": 515042,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145617583.131, "dur": 56.319, + "args": { + "External id": 515043,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145617653.473, "dur": 39.493, + "args": { + "External id": 515044,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145617700.596, "dur": 28.876, + "args": { + "External id": 515045,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145617738.989, "dur": 31.844, + "args": { + "External id": 515046,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274145617800.166, "dur": 25.309, + "args": { + "External id": 515047,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145617843.452, "dur": 26.283, + "args": { + "External id": 515048,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145617886.441, "dur": 16.522, + "args": { + "External id": 515049,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145617919.433, "dur": 13.650, + "args": { + "External id": 515050,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274145617952.642, "dur": 16.040, + "args": { + "External id": 515051,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145618095.948, "dur": 20.085, + "args": { + "External id": 515052,"Record function id": 0, "Ev Idx": 8171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145618100.768, "dur": 13.799, + "args": { + "External id": 515053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145618106.017, "dur": 6.991, + "args": { + "External id": 515054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145618107.777, "dur": 4.964, + "args": { + "External id": 515055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145618122.135, "dur": 4.562, + "args": { + "External id": 515056,"Record function id": 0, "Ev Idx": 8175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145618123.363, "dur": 2.884, + "args": { + "External id": 515057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145618124.204, "dur": 1.549, + "args": { + "External id": 515058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145618125.020, "dur": 0.635, + "args": { + "External id": 515059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145618130.199, "dur": 4.796, + "args": { + "External id": 515060,"Record function id": 0, "Ev Idx": 8179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145618131.395, "dur": 3.146, + "args": { + "External id": 515061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145618132.046, "dur": 1.991, + "args": { + "External id": 515062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145618132.971, "dur": 0.959, + "args": { + "External id": 515063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145618138.155, "dur": 4.720, + "args": { + "External id": 515064,"Record function id": 0, "Ev Idx": 8183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145618139.757, "dur": 2.701, + "args": { + "External id": 515065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145618140.404, "dur": 1.579, + "args": { + "External id": 515066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145618141.142, "dur": 0.768, + "args": { + "External id": 515067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145618145.914, "dur": 3.759, + "args": { + "External id": 515068,"Record function id": 0, "Ev Idx": 8187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145618147.264, "dur": 1.957, + "args": { + "External id": 515069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145618147.808, "dur": 0.916, + "args": { + "External id": 515070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145618148.073, "dur": 0.565, + "args": { + "External id": 515071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145618152.671, "dur": 30.618, + "args": { + "External id": 515072,"Record function id": 0, "Ev Idx": 8191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145618178.077, "dur": 4.763, + "args": { + "External id": 515073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145618178.831, "dur": 3.533, + "args": { + "External id": 515074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145618181.582, "dur": 0.664, + "args": { + "External id": 515075,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145618186.680, "dur": 5.750, + "args": { + "External id": 515076,"Record function id": 0, "Ev Idx": 8195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145618188.063, "dur": 3.936, + "args": { + "External id": 515077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145618188.558, "dur": 3.022, + "args": { + "External id": 515078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145618188.831, "dur": 2.667, + "args": { + "External id": 515079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145618195.504, "dur": 3.828, + "args": { + "External id": 515080,"Record function id": 0, "Ev Idx": 8199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145618196.663, "dur": 2.258, + "args": { + "External id": 515081,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145618197.308, "dur": 1.165, + "args": { + "External id": 515082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145618197.567, "dur": 0.801, + "args": { + "External id": 515083,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145618202.333, "dur": 3.436, + "args": { + "External id": 515084,"Record function id": 0, "Ev Idx": 8203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145618203.359, "dur": 1.991, + "args": { + "External id": 515085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145618203.871, "dur": 1.016, + "args": { + "External id": 515086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145618204.212, "dur": 0.589, + "args": { + "External id": 515087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145618210.097, "dur": 68649.047, + "args": { + "External id": 515088,"Record function id": 0, "Sequence number": 5516282, "Fwd thread id": 1, "Ev Idx": 8207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145618211.555, "dur": 68637.961, + "args": { + "External id": 515089,"Sequence number": 5516282, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8208 + } + }, + { + "ph": "f", "id": 405, "pid": 2338711, "tid": 2379440, "ts": 6274145618211.555, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.21)", "pid": 2338711, "tid": 2379440, + "ts": 6274145618248.586, "dur": 45.879, + "args": { + "External id": 515090,"Record function id": 0, "Ev Idx": 8209 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.21)", "pid": 2338711, "tid": 2379440, + "ts": 6274145618302.799, "dur": 70.823, + "args": { + "External id": 515091,"Record function id": 0, "Ev Idx": 8210 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.21)", "pid": 2338711, "tid": 2379440, + "ts": 6274145618379.797, "dur": 68461.183, + "args": { + "External id": 515092,"Record function id": 0, "Ev Idx": 8211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145618478.644, "dur": 8.796, + "args": { + "External id": 515093,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145618497.400, "dur": 4.952, + "args": { + "External id": 515094,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274145618520.529, "dur": 67303.093, + "args": { + "External id": 515095,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274145618538.249, "dur": 67271.134, + "args": { + "External id": 515096,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145618678.321, "dur": 27.320, + "args": { + "External id": 515097,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145618727.843, "dur": 67031.294, + "args": { + "External id": 515098,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145618731.282, "dur": 67026.584, + "args": { + "External id": 515099,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145618736.395, "dur": 17.120, + "args": { + "External id": 515100,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145618755.649, "dur": 66996.137, + "args": { + "External id": 515101,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145685944.010, "dur": 13.038, + "args": { + "External id": 515102,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145685947.926, "dur": 8.677, + "args": { + "External id": 515103,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274145685989.789, "dur": 404.572, + "args": { + "External id": 515104,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145686023.134, "dur": 365.448, + "args": { + "External id": 515105,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8224, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274145686037.538, "dur": 345.449, + "args": { + "External id": 515106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145686421.670, "dur": 2.741, + "args": { + "External id": 515107,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8226, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145686488.835, "dur": 7.335, + "args": { + "External id": 515108,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145686509.778, "dur": 35.608, + "args": { + "External id": 515109,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145686556.550, "dur": 3.455, + "args": { + "External id": 515110,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145686565.337, "dur": 12.699, + "args": { + "External id": 515111,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145686584.547, "dur": 0.995, + "args": { + "External id": 515112,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145686613.636, "dur": 18.289, + "args": { + "External id": 515113,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145686640.797, "dur": 1.588, + "args": { + "External id": 515114,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145686649.025, "dur": 14.603, + "args": { + "External id": 515115,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145686669.814, "dur": 1.352, + "args": { + "External id": 515116,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145686676.613, "dur": 11.987, + "args": { + "External id": 515117,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145686693.663, "dur": 1.834, + "args": { + "External id": 515118,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145686699.644, "dur": 10.282, + "args": { + "External id": 515119,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145686716.973, "dur": 0.742, + "args": { + "External id": 515120,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145686721.244, "dur": 10.834, + "args": { + "External id": 515121,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145686736.347, "dur": 0.977, + "args": { + "External id": 515122,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145686741.440, "dur": 10.760, + "args": { + "External id": 515123,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145686755.884, "dur": 0.915, + "args": { + "External id": 515124,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145686762.215, "dur": 12.836, + "args": { + "External id": 515125,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145686876.499, "dur": 3088.839, + "args": { + "External id": 515126,"Record function id": 0, "Ev Idx": 8245 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.20)", "pid": 2338711, "tid": 2379440, + "ts": 6274145686898.316, "dur": 1146.649, + "args": { + "External id": 515127,"Record function id": 0, "Ev Idx": 8246 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2338711, "tid": 2379440, + "ts": 6274145686915.042, "dur": 373.756, + "args": { + "External id": 515128,"Record function id": 0, "Ev Idx": 8247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145687000.049, "dur": 6.450, + "args": { + "External id": 515129,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145687009.908, "dur": 0.908, + "args": { + "External id": 515130,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145687013.229, "dur": 0.902, + "args": { + "External id": 515131,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145687016.113, "dur": 0.960, + "args": { + "External id": 515132,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145687018.627, "dur": 0.785, + "args": { + "External id": 515133,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145687021.325, "dur": 0.996, + "args": { + "External id": 515134,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145687023.722, "dur": 0.925, + "args": { + "External id": 515135,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145687028.383, "dur": 1.169, + "args": { + "External id": 515136,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145687031.279, "dur": 3.270, + "args": { + "External id": 515137,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145687035.936, "dur": 0.943, + "args": { + "External id": 515138,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145687054.753, "dur": 201.059, + "args": { + "External id": 515139,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145687108.978, "dur": 141.891, + "args": { + "External id": 515140,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145687130.785, "dur": 14.651, + "args": { + "External id": 515141,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145687149.433, "dur": 73.579, + "args": { + "External id": 515142,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145687151.920, "dur": 70.636, + "args": { + "External id": 515143,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145687158.404, "dur": 6.526, + "args": { + "External id": 515144,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145687166.874, "dur": 54.831, + "args": { + "External id": 515145,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8264 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.19", "pid": 2338711, "tid": 2379440, + "ts": 6274145687376.939, "dur": 660.273, + "args": { + "External id": 515146,"Record function id": 0, "Ev Idx": 8265 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2338711, "tid": 2379440, + "ts": 6274145687395.992, "dur": 627.020, + "args": { + "External id": 515147,"Record function id": 0, "Ev Idx": 8266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145687452.514, "dur": 5.065, + "args": { + "External id": 515148,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274145687478.149, "dur": 29.784, + "args": { + "External id": 515149,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145687482.975, "dur": 1.684, + "args": { + "External id": 515150,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145687486.624, "dur": 1.665, + "args": { + "External id": 515151,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145687489.445, "dur": 0.480, + "args": { + "External id": 515152,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145687491.100, "dur": 2.687, + "args": { + "External id": 515153,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145687496.459, "dur": 0.515, + "args": { + "External id": 515154,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145687498.607, "dur": 0.496, + "args": { + "External id": 515155,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145687500.073, "dur": 0.366, + "args": { + "External id": 515156,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145687502.525, "dur": 0.354, + "args": { + "External id": 515157,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145687504.198, "dur": 0.379, + "args": { + "External id": 515158,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145687517.826, "dur": 43.426, + "args": { + "External id": 515159,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274145687612.668, "dur": 150.395, + "args": { + "External id": 515160,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145687626.913, "dur": 6.558, + "args": { + "External id": 515161,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274145687640.407, "dur": 12.958, + "args": { + "External id": 515162,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145687645.750, "dur": 7.069, + "args": { + "External id": 515163,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145687649.780, "dur": 0.876, + "args": { + "External id": 515164,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274145687669.559, "dur": 34.518, + "args": { + "External id": 515165,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145687672.256, "dur": 2.860, + "args": { + "External id": 515166,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145687678.121, "dur": 0.390, + "args": { + "External id": 515167,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145687680.276, "dur": 0.472, + "args": { + "External id": 515168,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145687682.235, "dur": 1.575, + "args": { + "External id": 515169,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145687685.591, "dur": 0.724, + "args": { + "External id": 515170,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145687688.081, "dur": 0.579, + "args": { + "External id": 515171,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145687691.987, "dur": 0.610, + "args": { + "External id": 515172,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145687694.187, "dur": 0.366, + "args": { + "External id": 515173,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145687696.574, "dur": 3.214, + "args": { + "External id": 515174,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145687718.969, "dur": 35.754, + "args": { + "External id": 515175,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274145687814.662, "dur": 130.017, + "args": { + "External id": 515176,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145687848.488, "dur": 92.625, + "args": { + "External id": 515177,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8296, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274145687858.758, "dur": 77.985, + "args": { + "External id": 515178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145687963.714, "dur": 1.928, + "args": { + "External id": 515179,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8298, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145688052.538, "dur": 1890.694, + "args": { + "External id": 515180,"Sequence number": 5516281, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8299 + } + }, + { + "ph": "f", "id": 406, "pid": 2338711, "tid": 2379440, "ts": 6274145688052.538, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145688207.419, "dur": 113.021, + "args": { + "External id": 515181,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145688367.038, "dur": 39.615, + "args": { + "External id": 515182,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274145688426.741, "dur": 51.943, + "args": { + "External id": 515183,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145688488.794, "dur": 31.954, + "args": { + "External id": 515184,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145688526.900, "dur": 34.818, + "args": { + "External id": 515185,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145688568.589, "dur": 48.806, + "args": { + "External id": 515186,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145688634.399, "dur": 45.295, + "args": { + "External id": 515187,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145688712.144, "dur": 28.732, + "args": { + "External id": 515188,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145688760.840, "dur": 31.195, + "args": { + "External id": 515189,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145688816.130, "dur": 19.966, + "args": { + "External id": 515190,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145688850.959, "dur": 16.963, + "args": { + "External id": 515191,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145688877.628, "dur": 38.538, + "args": { + "External id": 515192,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145688920.224, "dur": 35.041, + "args": { + "External id": 515193,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145688987.026, "dur": 301.919, + "args": { + "External id": 515194,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145689110.678, "dur": 7.948, + "args": { + "External id": 515195,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145689121.059, "dur": 2.652, + "args": { + "External id": 515196,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145689124.795, "dur": 2.063, + "args": { + "External id": 515197,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145689127.862, "dur": 4.171, + "args": { + "External id": 515198,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145689181.940, "dur": 7.733, + "args": { + "External id": 515199,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145689186.540, "dur": 2.928, + "args": { + "External id": 515200,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145689191.773, "dur": 32.805, + "args": { + "External id": 515201,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145689198.366, "dur": 1.869, + "args": { + "External id": 515202,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145689226.082, "dur": 1.960, + "args": { + "External id": 515203,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145689227.357, "dur": 0.616, + "args": { + "External id": 515204,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145689228.937, "dur": 13.937, + "args": { + "External id": 515205,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145689230.945, "dur": 0.390, + "args": { + "External id": 515206,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145689328.513, "dur": 28.180, + "args": { + "External id": 515207,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145689382.327, "dur": 17.630, + "args": { + "External id": 515208,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145689422.227, "dur": 51.947, + "args": { + "External id": 515209,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145689481.566, "dur": 41.835, + "args": { + "External id": 515210,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145689535.196, "dur": 20.645, + "args": { + "External id": 515211,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145689563.093, "dur": 54.805, + "args": { + "External id": 515212,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145689630.769, "dur": 39.933, + "args": { + "External id": 515213,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145689679.144, "dur": 32.081, + "args": { + "External id": 515214,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274145689740.744, "dur": 27.074, + "args": { + "External id": 515215,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145689790.054, "dur": 24.739, + "args": { + "External id": 515216,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145689832.635, "dur": 17.561, + "args": { + "External id": 515217,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145689865.432, "dur": 13.249, + "args": { + "External id": 515218,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274145689893.612, "dur": 15.384, + "args": { + "External id": 515219,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145689989.319, "dur": 16.553, + "args": { + "External id": 515220,"Record function id": 0, "Ev Idx": 8339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145689992.991, "dur": 11.857, + "args": { + "External id": 515221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145689997.299, "dur": 6.429, + "args": { + "External id": 515222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145689999.009, "dur": 4.599, + "args": { + "External id": 515223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145690009.923, "dur": 5.499, + "args": { + "External id": 515224,"Record function id": 0, "Ev Idx": 8343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145690011.534, "dur": 3.419, + "args": { + "External id": 515225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145690012.436, "dur": 2.041, + "args": { + "External id": 515226,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145690013.462, "dur": 0.937, + "args": { + "External id": 515227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145690018.661, "dur": 4.136, + "args": { + "External id": 515228,"Record function id": 0, "Ev Idx": 8347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145690019.993, "dur": 2.395, + "args": { + "External id": 515229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145690020.441, "dur": 1.421, + "args": { + "External id": 515230,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145690020.999, "dur": 0.737, + "args": { + "External id": 515231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145690026.127, "dur": 4.187, + "args": { + "External id": 515232,"Record function id": 0, "Ev Idx": 8351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145690027.492, "dur": 2.418, + "args": { + "External id": 515233,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145690028.076, "dur": 1.424, + "args": { + "External id": 515234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145690028.709, "dur": 0.730, + "args": { + "External id": 515235,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145690033.350, "dur": 3.467, + "args": { + "External id": 515236,"Record function id": 0, "Ev Idx": 8355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145690034.354, "dur": 2.065, + "args": { + "External id": 515237,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145690034.857, "dur": 1.046, + "args": { + "External id": 515238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145690035.139, "dur": 0.632, + "args": { + "External id": 515239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145690039.896, "dur": 6.125, + "args": { + "External id": 515240,"Record function id": 0, "Ev Idx": 8359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145690041.210, "dur": 4.383, + "args": { + "External id": 515241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145690041.645, "dur": 3.514, + "args": { + "External id": 515242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145690042.179, "dur": 2.906, + "args": { + "External id": 515243,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145690049.147, "dur": 3.563, + "args": { + "External id": 515244,"Record function id": 0, "Ev Idx": 8363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145690050.179, "dur": 2.135, + "args": { + "External id": 515245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145690050.760, "dur": 1.119, + "args": { + "External id": 515246,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145690051.044, "dur": 0.769, + "args": { + "External id": 515247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145690092.240, "dur": 10.562, + "args": { + "External id": 515248,"Record function id": 0, "Ev Idx": 8367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145690095.368, "dur": 6.722, + "args": { + "External id": 515249,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145690096.623, "dur": 4.556, + "args": { + "External id": 515250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145690099.374, "dur": 1.582, + "args": { + "External id": 515251,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145690107.472, "dur": 4.342, + "args": { + "External id": 515252,"Record function id": 0, "Ev Idx": 8371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145690109.038, "dur": 2.325, + "args": { + "External id": 515253,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145690109.899, "dur": 0.913, + "args": { + "External id": 515254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145690110.213, "dur": 0.521, + "args": { + "External id": 515255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145690115.585, "dur": 76991.935, + "args": { + "External id": 515256,"Record function id": 0, "Sequence number": 5516280, "Fwd thread id": 1, "Ev Idx": 8375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145690117.100, "dur": 76979.411, + "args": { + "External id": 515257,"Sequence number": 5516280, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8376 + } + }, + { + "ph": "f", "id": 407, "pid": 2338711, "tid": 2379440, "ts": 6274145690117.100, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.20)", "pid": 2338711, "tid": 2379440, + "ts": 6274145690152.810, "dur": 44.298, + "args": { + "External id": 515258,"Record function id": 0, "Ev Idx": 8377 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.20)", "pid": 2338711, "tid": 2379440, + "ts": 6274145690205.516, "dur": 67.263, + "args": { + "External id": 515259,"Record function id": 0, "Ev Idx": 8378 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.20)", "pid": 2338711, "tid": 2379440, + "ts": 6274145690278.860, "dur": 76807.041, + "args": { + "External id": 515260,"Record function id": 0, "Ev Idx": 8379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145690376.046, "dur": 7.849, + "args": { + "External id": 515261,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145690393.956, "dur": 5.243, + "args": { + "External id": 515262,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274145690416.566, "dur": 75547.104, + "args": { + "External id": 515263,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274145690433.679, "dur": 75515.543, + "args": { + "External id": 515264,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145690539.302, "dur": 18.358, + "args": { + "External id": 515265,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145690577.797, "dur": 75320.797, + "args": { + "External id": 515266,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145690580.890, "dur": 75316.339, + "args": { + "External id": 515267,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145690585.916, "dur": 37.852, + "args": { + "External id": 515268,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145690628.326, "dur": 75262.805, + "args": { + "External id": 515269,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145766126.861, "dur": 14.879, + "args": { + "External id": 515270,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145766130.954, "dur": 10.184, + "args": { + "External id": 515271,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274145766175.250, "dur": 431.533, + "args": { + "External id": 515272,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145766210.354, "dur": 372.608, + "args": { + "External id": 515273,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8392, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274145766224.044, "dur": 353.431, + "args": { + "External id": 515274,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145766638.647, "dur": 4.000, + "args": { + "External id": 515275,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8394, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145766718.954, "dur": 11.114, + "args": { + "External id": 515276,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145766743.297, "dur": 45.488, + "args": { + "External id": 515277,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145766799.540, "dur": 4.019, + "args": { + "External id": 515278,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145766809.311, "dur": 13.351, + "args": { + "External id": 515279,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145766828.163, "dur": 1.036, + "args": { + "External id": 515280,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145766834.215, "dur": 13.484, + "args": { + "External id": 515281,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145766852.649, "dur": 0.947, + "args": { + "External id": 515282,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145766858.041, "dur": 12.041, + "args": { + "External id": 515283,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145766876.657, "dur": 1.005, + "args": { + "External id": 515284,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145766881.922, "dur": 13.403, + "args": { + "External id": 515285,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145766899.275, "dur": 1.211, + "args": { + "External id": 515286,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145766904.681, "dur": 11.716, + "args": { + "External id": 515287,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145766920.528, "dur": 1.032, + "args": { + "External id": 515288,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145766925.503, "dur": 13.093, + "args": { + "External id": 515289,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145766942.342, "dur": 0.690, + "args": { + "External id": 515290,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145766949.231, "dur": 12.066, + "args": { + "External id": 515291,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145766965.494, "dur": 0.709, + "args": { + "External id": 515292,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145766970.529, "dur": 12.528, + "args": { + "External id": 515293,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145767126.348, "dur": 3128.108, + "args": { + "External id": 515294,"Record function id": 0, "Ev Idx": 8413 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.19)", "pid": 2338711, "tid": 2379440, + "ts": 6274145767151.347, "dur": 1147.612, + "args": { + "External id": 515295,"Record function id": 0, "Ev Idx": 8414 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2338711, "tid": 2379440, + "ts": 6274145767170.003, "dur": 330.784, + "args": { + "External id": 515296,"Record function id": 0, "Ev Idx": 8415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145767255.915, "dur": 7.149, + "args": { + "External id": 515297,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145767267.143, "dur": 0.759, + "args": { + "External id": 515298,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145767270.209, "dur": 0.726, + "args": { + "External id": 515299,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145767272.797, "dur": 0.834, + "args": { + "External id": 515300,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145767275.155, "dur": 0.702, + "args": { + "External id": 515301,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145767279.808, "dur": 0.957, + "args": { + "External id": 515302,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145767282.518, "dur": 0.919, + "args": { + "External id": 515303,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145767284.774, "dur": 1.130, + "args": { + "External id": 515304,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145767287.979, "dur": 2.913, + "args": { + "External id": 515305,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145767294.672, "dur": 0.872, + "args": { + "External id": 515306,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145767313.568, "dur": 154.261, + "args": { + "External id": 515307,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145767331.609, "dur": 131.535, + "args": { + "External id": 515308,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145767348.698, "dur": 14.508, + "args": { + "External id": 515309,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145767366.903, "dur": 69.805, + "args": { + "External id": 515310,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145767370.047, "dur": 66.256, + "args": { + "External id": 515311,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145767376.549, "dur": 5.804, + "args": { + "External id": 515312,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145767384.140, "dur": 51.471, + "args": { + "External id": 515313,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8432 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.18", "pid": 2338711, "tid": 2379440, + "ts": 6274145767584.688, "dur": 706.069, + "args": { + "External id": 515314,"Record function id": 0, "Ev Idx": 8433 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2338711, "tid": 2379440, + "ts": 6274145767630.951, "dur": 646.183, + "args": { + "External id": 515315,"Record function id": 0, "Ev Idx": 8434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145767707.253, "dur": 8.818, + "args": { + "External id": 515316,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274145767737.023, "dur": 32.134, + "args": { + "External id": 515317,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145767743.436, "dur": 2.377, + "args": { + "External id": 515318,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145767748.917, "dur": 0.545, + "args": { + "External id": 515319,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145767751.076, "dur": 0.741, + "args": { + "External id": 515320,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145767753.871, "dur": 2.149, + "args": { + "External id": 515321,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145767757.523, "dur": 0.333, + "args": { + "External id": 515322,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145767759.282, "dur": 0.302, + "args": { + "External id": 515323,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145767761.443, "dur": 0.463, + "args": { + "External id": 515324,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145767763.723, "dur": 0.403, + "args": { + "External id": 515325,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145767765.343, "dur": 0.506, + "args": { + "External id": 515326,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145767780.087, "dur": 47.419, + "args": { + "External id": 515327,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274145767860.217, "dur": 115.897, + "args": { + "External id": 515328,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145767871.103, "dur": 3.834, + "args": { + "External id": 515329,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274145767880.316, "dur": 11.148, + "args": { + "External id": 515330,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145767884.977, "dur": 6.076, + "args": { + "External id": 515331,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145767888.991, "dur": 0.667, + "args": { + "External id": 515332,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274145767898.933, "dur": 28.634, + "args": { + "External id": 515333,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145767900.649, "dur": 2.907, + "args": { + "External id": 515334,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145767905.528, "dur": 0.572, + "args": { + "External id": 515335,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145767907.278, "dur": 0.773, + "args": { + "External id": 515336,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145767910.067, "dur": 0.660, + "args": { + "External id": 515337,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145767912.644, "dur": 0.369, + "args": { + "External id": 515338,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145767914.611, "dur": 0.626, + "args": { + "External id": 515339,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145767916.961, "dur": 0.559, + "args": { + "External id": 515340,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145767919.217, "dur": 0.436, + "args": { + "External id": 515341,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145767921.284, "dur": 2.783, + "args": { + "External id": 515342,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145767937.909, "dur": 30.791, + "args": { + "External id": 515343,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274145768024.178, "dur": 167.415, + "args": { + "External id": 515344,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145768090.503, "dur": 96.444, + "args": { + "External id": 515345,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8464, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274145768103.333, "dur": 79.270, + "args": { + "External id": 515346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145768212.034, "dur": 2.215, + "args": { + "External id": 515347,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8466, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145768306.545, "dur": 1927.638, + "args": { + "External id": 515348,"Sequence number": 5516279, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8467 + } + }, + { + "ph": "f", "id": 408, "pid": 2338711, "tid": 2379440, "ts": 6274145768306.545, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145768426.458, "dur": 109.656, + "args": { + "External id": 515349,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145768575.944, "dur": 71.603, + "args": { + "External id": 515350,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274145768677.948, "dur": 67.143, + "args": { + "External id": 515351,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145768756.746, "dur": 34.982, + "args": { + "External id": 515352,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145768798.578, "dur": 33.986, + "args": { + "External id": 515353,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145768839.099, "dur": 29.072, + "args": { + "External id": 515354,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145768878.292, "dur": 30.015, + "args": { + "External id": 515355,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145768938.208, "dur": 24.391, + "args": { + "External id": 515356,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145768984.533, "dur": 29.333, + "args": { + "External id": 515357,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145769035.753, "dur": 58.273, + "args": { + "External id": 515358,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145769116.186, "dur": 18.389, + "args": { + "External id": 515359,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145769146.742, "dur": 55.701, + "args": { + "External id": 515360,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145769206.859, "dur": 38.326, + "args": { + "External id": 515361,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145769276.808, "dur": 257.022, + "args": { + "External id": 515362,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145769363.299, "dur": 7.171, + "args": { + "External id": 515363,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145769372.756, "dur": 3.657, + "args": { + "External id": 515364,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145769377.779, "dur": 2.695, + "args": { + "External id": 515365,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145769382.040, "dur": 4.446, + "args": { + "External id": 515366,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145769431.756, "dur": 5.248, + "args": { + "External id": 515367,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145769433.867, "dur": 2.949, + "args": { + "External id": 515368,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145769438.994, "dur": 31.904, + "args": { + "External id": 515369,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145769444.509, "dur": 1.754, + "args": { + "External id": 515370,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145769472.491, "dur": 2.018, + "args": { + "External id": 515371,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145769473.529, "dur": 0.909, + "args": { + "External id": 515372,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145769476.050, "dur": 15.259, + "args": { + "External id": 515373,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145769478.847, "dur": 0.544, + "args": { + "External id": 515374,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145769567.212, "dur": 49.130, + "args": { + "External id": 515375,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145769667.579, "dur": 32.116, + "args": { + "External id": 515376,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145769713.211, "dur": 57.558, + "args": { + "External id": 515377,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145769778.295, "dur": 47.271, + "args": { + "External id": 515378,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145769834.725, "dur": 21.208, + "args": { + "External id": 515379,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145769864.400, "dur": 32.313, + "args": { + "External id": 515380,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145769904.699, "dur": 29.696, + "args": { + "External id": 515381,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145769942.180, "dur": 32.500, + "args": { + "External id": 515382,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274145769993.675, "dur": 25.659, + "args": { + "External id": 515383,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145770035.247, "dur": 61.933, + "args": { + "External id": 515384,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145770114.698, "dur": 21.511, + "args": { + "External id": 515385,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145770151.827, "dur": 15.913, + "args": { + "External id": 515386,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274145770180.729, "dur": 18.717, + "args": { + "External id": 515387,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145770278.105, "dur": 16.396, + "args": { + "External id": 515388,"Record function id": 0, "Ev Idx": 8507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145770281.762, "dur": 11.794, + "args": { + "External id": 515389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145770286.171, "dur": 6.516, + "args": { + "External id": 515390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145770288.171, "dur": 4.399, + "args": { + "External id": 515391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145770298.309, "dur": 5.862, + "args": { + "External id": 515392,"Record function id": 0, "Ev Idx": 8511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145770299.858, "dur": 3.888, + "args": { + "External id": 515393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145770301.047, "dur": 2.270, + "args": { + "External id": 515394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145770302.214, "dur": 1.003, + "args": { + "External id": 515395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145770307.360, "dur": 4.298, + "args": { + "External id": 515396,"Record function id": 0, "Ev Idx": 8515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145770308.801, "dur": 2.444, + "args": { + "External id": 515397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145770309.303, "dur": 1.481, + "args": { + "External id": 515398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145770309.679, "dur": 0.991, + "args": { + "External id": 515399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145770314.771, "dur": 3.383, + "args": { + "External id": 515400,"Record function id": 0, "Ev Idx": 8519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145770315.765, "dur": 1.997, + "args": { + "External id": 515401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145770316.220, "dur": 1.160, + "args": { + "External id": 515402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145770316.669, "dur": 0.641, + "args": { + "External id": 515403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145770321.205, "dur": 4.474, + "args": { + "External id": 515404,"Record function id": 0, "Ev Idx": 8523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145770322.501, "dur": 2.782, + "args": { + "External id": 515405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145770323.162, "dur": 1.428, + "args": { + "External id": 515406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145770323.823, "dur": 0.692, + "args": { + "External id": 515407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145770328.777, "dur": 6.951, + "args": { + "External id": 515408,"Record function id": 0, "Ev Idx": 8527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145770330.100, "dur": 5.173, + "args": { + "External id": 515409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145770330.650, "dur": 4.217, + "args": { + "External id": 515410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145770331.245, "dur": 3.507, + "args": { + "External id": 515411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145770338.959, "dur": 3.951, + "args": { + "External id": 515412,"Record function id": 0, "Ev Idx": 8531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145770340.331, "dur": 2.140, + "args": { + "External id": 515413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145770340.770, "dur": 1.286, + "args": { + "External id": 515414,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145770341.100, "dur": 0.870, + "args": { + "External id": 515415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145770345.948, "dur": 16.172, + "args": { + "External id": 515416,"Record function id": 0, "Ev Idx": 8535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145770359.517, "dur": 2.156, + "args": { + "External id": 515417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145770360.091, "dur": 1.057, + "args": { + "External id": 515418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145770360.370, "dur": 0.708, + "args": { + "External id": 515419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145770365.479, "dur": 5.311, + "args": { + "External id": 515420,"Record function id": 0, "Ev Idx": 8539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145770366.363, "dur": 4.013, + "args": { + "External id": 515421,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145770366.857, "dur": 3.033, + "args": { + "External id": 515422,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145770369.207, "dur": 0.560, + "args": { + "External id": 515423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145770374.710, "dur": 66511.795, + "args": { + "External id": 515424,"Record function id": 0, "Sequence number": 5516278, "Fwd thread id": 1, "Ev Idx": 8543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145770382.439, "dur": 66493.473, + "args": { + "External id": 515425,"Sequence number": 5516278, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8544 + } + }, + { + "ph": "f", "id": 409, "pid": 2338711, "tid": 2379440, "ts": 6274145770382.439, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.19)", "pid": 2338711, "tid": 2379440, + "ts": 6274145770417.550, "dur": 42.137, + "args": { + "External id": 515426,"Record function id": 0, "Ev Idx": 8545 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.19)", "pid": 2338711, "tid": 2379440, + "ts": 6274145770467.909, "dur": 65.290, + "args": { + "External id": 515427,"Record function id": 0, "Ev Idx": 8546 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.19)", "pid": 2338711, "tid": 2379440, + "ts": 6274145770539.372, "dur": 66328.078, + "args": { + "External id": 515428,"Record function id": 0, "Ev Idx": 8547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145770664.906, "dur": 10.648, + "args": { + "External id": 515429,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145770688.500, "dur": 6.540, + "args": { + "External id": 515430,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274145770713.522, "dur": 65043.082, + "args": { + "External id": 515431,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274145770731.556, "dur": 65009.243, + "args": { + "External id": 515432,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145770830.287, "dur": 18.967, + "args": { + "External id": 515433,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145770869.695, "dur": 64813.040, + "args": { + "External id": 515434,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145770872.832, "dur": 64808.457, + "args": { + "External id": 515435,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145770878.068, "dur": 9.202, + "args": { + "External id": 515436,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145770889.308, "dur": 64785.838, + "args": { + "External id": 515437,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145835878.869, "dur": 13.952, + "args": { + "External id": 515438,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145835882.827, "dur": 9.567, + "args": { + "External id": 515439,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274145835927.839, "dur": 447.207, + "args": { + "External id": 515440,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145835961.180, "dur": 408.034, + "args": { + "External id": 515441,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8560, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274145835976.252, "dur": 385.241, + "args": { + "External id": 515442,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145836404.067, "dur": 2.997, + "args": { + "External id": 515443,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8562, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145836472.527, "dur": 7.920, + "args": { + "External id": 515444,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145836493.607, "dur": 37.696, + "args": { + "External id": 515445,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145836542.903, "dur": 4.083, + "args": { + "External id": 515446,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145836552.845, "dur": 14.392, + "args": { + "External id": 515447,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145836573.348, "dur": 1.048, + "args": { + "External id": 515448,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145836579.792, "dur": 36.636, + "args": { + "External id": 515449,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145836627.253, "dur": 1.912, + "args": { + "External id": 515450,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145836635.609, "dur": 17.906, + "args": { + "External id": 515451,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145836660.708, "dur": 1.151, + "args": { + "External id": 515452,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145836667.789, "dur": 17.307, + "args": { + "External id": 515453,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145836690.707, "dur": 1.738, + "args": { + "External id": 515454,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145836697.848, "dur": 16.315, + "args": { + "External id": 515455,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145836720.998, "dur": 1.292, + "args": { + "External id": 515456,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145836728.540, "dur": 17.217, + "args": { + "External id": 515457,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145836751.907, "dur": 1.202, + "args": { + "External id": 515458,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145836759.521, "dur": 15.720, + "args": { + "External id": 515459,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145836781.855, "dur": 1.054, + "args": { + "External id": 515460,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145836787.279, "dur": 13.976, + "args": { + "External id": 515461,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145836904.195, "dur": 3259.876, + "args": { + "External id": 515462,"Record function id": 0, "Ev Idx": 8581 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.18)", "pid": 2338711, "tid": 2379440, + "ts": 6274145836926.313, "dur": 1216.826, + "args": { + "External id": 515463,"Record function id": 0, "Ev Idx": 8582 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2338711, "tid": 2379440, + "ts": 6274145836945.224, "dur": 401.036, + "args": { + "External id": 515464,"Record function id": 0, "Ev Idx": 8583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145837032.272, "dur": 7.160, + "args": { + "External id": 515465,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145837043.108, "dur": 1.290, + "args": { + "External id": 515466,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145837046.990, "dur": 1.070, + "args": { + "External id": 515467,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145837049.732, "dur": 1.200, + "args": { + "External id": 515468,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145837099.792, "dur": 1.573, + "args": { + "External id": 515469,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145837104.835, "dur": 0.852, + "args": { + "External id": 515470,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145837107.549, "dur": 1.007, + "args": { + "External id": 515471,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145837112.332, "dur": 2.108, + "args": { + "External id": 515472,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145837116.059, "dur": 2.813, + "args": { + "External id": 515473,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145837120.166, "dur": 0.622, + "args": { + "External id": 515474,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145837140.323, "dur": 171.242, + "args": { + "External id": 515475,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145837158.710, "dur": 147.249, + "args": { + "External id": 515476,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145837181.052, "dur": 16.997, + "args": { + "External id": 515477,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145837202.289, "dur": 71.146, + "args": { + "External id": 515478,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145837205.483, "dur": 67.557, + "args": { + "External id": 515479,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145837210.161, "dur": 6.559, + "args": { + "External id": 515480,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145837219.183, "dur": 53.225, + "args": { + "External id": 515481,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8600 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.17", "pid": 2338711, "tid": 2379440, + "ts": 6274145837435.194, "dur": 699.544, + "args": { + "External id": 515482,"Record function id": 0, "Ev Idx": 8601 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2338711, "tid": 2379440, + "ts": 6274145837453.198, "dur": 667.133, + "args": { + "External id": 515483,"Record function id": 0, "Ev Idx": 8602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145837512.306, "dur": 5.963, + "args": { + "External id": 515484,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274145837534.555, "dur": 30.390, + "args": { + "External id": 515485,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145837540.381, "dur": 1.645, + "args": { + "External id": 515486,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145837544.230, "dur": 0.505, + "args": { + "External id": 515487,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145837546.081, "dur": 0.834, + "args": { + "External id": 515488,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145837548.888, "dur": 2.917, + "args": { + "External id": 515489,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145837553.445, "dur": 0.333, + "args": { + "External id": 515490,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145837555.043, "dur": 0.598, + "args": { + "External id": 515491,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145837557.871, "dur": 0.642, + "args": { + "External id": 515492,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145837559.904, "dur": 0.326, + "args": { + "External id": 515493,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145837561.297, "dur": 0.628, + "args": { + "External id": 515494,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145837575.807, "dur": 69.196, + "args": { + "External id": 515495,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274145837692.668, "dur": 126.403, + "args": { + "External id": 515496,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145837705.609, "dur": 5.414, + "args": { + "External id": 515497,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274145837716.791, "dur": 12.438, + "args": { + "External id": 515498,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145837721.957, "dur": 6.827, + "args": { + "External id": 515499,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145837726.217, "dur": 0.646, + "args": { + "External id": 515500,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274145837737.104, "dur": 27.651, + "args": { + "External id": 515501,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145837739.251, "dur": 2.800, + "args": { + "External id": 515502,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145837743.628, "dur": 0.558, + "args": { + "External id": 515503,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145837745.932, "dur": 0.517, + "args": { + "External id": 515504,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145837748.243, "dur": 0.750, + "args": { + "External id": 515505,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145837750.029, "dur": 0.711, + "args": { + "External id": 515506,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145837752.610, "dur": 0.600, + "args": { + "External id": 515507,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145837754.750, "dur": 0.502, + "args": { + "External id": 515508,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145837756.477, "dur": 0.425, + "args": { + "External id": 515509,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145837758.607, "dur": 2.875, + "args": { + "External id": 515510,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145837777.149, "dur": 34.005, + "args": { + "External id": 515511,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274145837869.046, "dur": 124.550, + "args": { + "External id": 515512,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145837897.473, "dur": 92.387, + "args": { + "External id": 515513,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8632, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274145837908.482, "dur": 77.136, + "args": { + "External id": 515514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145838015.973, "dur": 2.273, + "args": { + "External id": 515515,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8634, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145838152.373, "dur": 1988.918, + "args": { + "External id": 515516,"Sequence number": 5516277, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8635 + } + }, + { + "ph": "f", "id": 410, "pid": 2338711, "tid": 2379440, "ts": 6274145838152.373, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145838271.683, "dur": 116.612, + "args": { + "External id": 515517,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145838433.864, "dur": 39.899, + "args": { + "External id": 515518,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274145838494.450, "dur": 49.507, + "args": { + "External id": 515519,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145838555.126, "dur": 52.359, + "args": { + "External id": 515520,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145838621.251, "dur": 51.915, + "args": { + "External id": 515521,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145838683.501, "dur": 33.686, + "args": { + "External id": 515522,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145838726.960, "dur": 31.317, + "args": { + "External id": 515523,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145838787.698, "dur": 29.269, + "args": { + "External id": 515524,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145838838.698, "dur": 32.459, + "args": { + "External id": 515525,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145838892.740, "dur": 20.255, + "args": { + "External id": 515526,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145838928.834, "dur": 15.313, + "args": { + "External id": 515527,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145838955.038, "dur": 41.483, + "args": { + "External id": 515528,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145839000.378, "dur": 35.060, + "args": { + "External id": 515529,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145839109.230, "dur": 301.423, + "args": { + "External id": 515530,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145839195.463, "dur": 7.872, + "args": { + "External id": 515531,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145839206.193, "dur": 3.038, + "args": { + "External id": 515532,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145839210.844, "dur": 2.832, + "args": { + "External id": 515533,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145839215.262, "dur": 4.410, + "args": { + "External id": 515534,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145839269.583, "dur": 5.973, + "args": { + "External id": 515535,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145839272.015, "dur": 3.253, + "args": { + "External id": 515536,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145839277.643, "dur": 35.605, + "args": { + "External id": 515537,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145839283.838, "dur": 1.961, + "args": { + "External id": 515538,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145839315.188, "dur": 2.063, + "args": { + "External id": 515539,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145839316.411, "dur": 0.768, + "args": { + "External id": 515540,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145839318.962, "dur": 33.415, + "args": { + "External id": 515541,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145839321.859, "dur": 0.534, + "args": { + "External id": 515542,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145839450.122, "dur": 33.976, + "args": { + "External id": 515543,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145839502.327, "dur": 20.292, + "args": { + "External id": 515544,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145839535.586, "dur": 70.415, + "args": { + "External id": 515545,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145839619.528, "dur": 63.804, + "args": { + "External id": 515546,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145839696.744, "dur": 33.982, + "args": { + "External id": 515547,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145839741.973, "dur": 48.038, + "args": { + "External id": 515548,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145839799.163, "dur": 31.772, + "args": { + "External id": 515549,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145839838.520, "dur": 32.755, + "args": { + "External id": 515550,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274145839892.573, "dur": 29.461, + "args": { + "External id": 515551,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145839939.588, "dur": 27.151, + "args": { + "External id": 515552,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145839981.821, "dur": 19.826, + "args": { + "External id": 515553,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145840015.495, "dur": 15.587, + "args": { + "External id": 515554,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274145840043.486, "dur": 58.221, + "args": { + "External id": 515555,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145840189.813, "dur": 17.825, + "args": { + "External id": 515556,"Record function id": 0, "Ev Idx": 8675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145840193.890, "dur": 12.817, + "args": { + "External id": 515557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145840198.774, "dur": 6.935, + "args": { + "External id": 515558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145840201.074, "dur": 4.513, + "args": { + "External id": 515559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145840211.500, "dur": 5.417, + "args": { + "External id": 515560,"Record function id": 0, "Ev Idx": 8679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145840212.957, "dur": 3.420, + "args": { + "External id": 515561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145840213.790, "dur": 2.101, + "args": { + "External id": 515562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145840214.716, "dur": 1.031, + "args": { + "External id": 515563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145840220.404, "dur": 4.319, + "args": { + "External id": 515564,"Record function id": 0, "Ev Idx": 8683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145840221.820, "dur": 2.525, + "args": { + "External id": 515565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145840222.510, "dur": 1.405, + "args": { + "External id": 515566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145840222.839, "dur": 0.985, + "args": { + "External id": 515567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145840227.840, "dur": 3.556, + "args": { + "External id": 515568,"Record function id": 0, "Ev Idx": 8687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145840228.769, "dur": 2.224, + "args": { + "External id": 515569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145840229.381, "dur": 1.219, + "args": { + "External id": 515570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145840229.717, "dur": 0.778, + "args": { + "External id": 515571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145840234.513, "dur": 18.201, + "args": { + "External id": 515572,"Record function id": 0, "Ev Idx": 8691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145840250.065, "dur": 2.244, + "args": { + "External id": 515573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145840250.659, "dur": 1.146, + "args": { + "External id": 515574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145840251.185, "dur": 0.537, + "args": { + "External id": 515575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145840255.945, "dur": 9.577, + "args": { + "External id": 515576,"Record function id": 0, "Ev Idx": 8695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145840257.632, "dur": 7.435, + "args": { + "External id": 515577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145840258.340, "dur": 6.263, + "args": { + "External id": 515578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145840260.979, "dur": 3.510, + "args": { + "External id": 515579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145840268.583, "dur": 4.326, + "args": { + "External id": 515580,"Record function id": 0, "Ev Idx": 8699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145840270.077, "dur": 2.439, + "args": { + "External id": 515581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145840270.682, "dur": 1.420, + "args": { + "External id": 515582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145840271.204, "dur": 0.826, + "args": { + "External id": 515583,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145840275.872, "dur": 41.821, + "args": { + "External id": 515584,"Record function id": 0, "Ev Idx": 8703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145840314.744, "dur": 2.466, + "args": { + "External id": 515585,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145840315.404, "dur": 1.367, + "args": { + "External id": 515586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145840315.916, "dur": 0.773, + "args": { + "External id": 515587,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145840327.624, "dur": 5.324, + "args": { + "External id": 515588,"Record function id": 0, "Ev Idx": 8707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145840328.638, "dur": 3.915, + "args": { + "External id": 515589,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145840329.083, "dur": 2.961, + "args": { + "External id": 515590,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145840331.227, "dur": 0.710, + "args": { + "External id": 515591,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145840336.997, "dur": 63522.178, + "args": { + "External id": 515592,"Record function id": 0, "Sequence number": 5516276, "Fwd thread id": 1, "Ev Idx": 8711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145840338.318, "dur": 63510.058, + "args": { + "External id": 515593,"Sequence number": 5516276, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8712 + } + }, + { + "ph": "f", "id": 411, "pid": 2338711, "tid": 2379440, "ts": 6274145840338.318, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.18)", "pid": 2338711, "tid": 2379440, + "ts": 6274145840375.438, "dur": 43.742, + "args": { + "External id": 515594,"Record function id": 0, "Ev Idx": 8713 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.18)", "pid": 2338711, "tid": 2379440, + "ts": 6274145840427.092, "dur": 70.479, + "args": { + "External id": 515595,"Record function id": 0, "Ev Idx": 8714 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.18)", "pid": 2338711, "tid": 2379440, + "ts": 6274145840504.244, "dur": 63334.536, + "args": { + "External id": 515596,"Record function id": 0, "Ev Idx": 8715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145840628.357, "dur": 11.497, + "args": { + "External id": 515597,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145840654.672, "dur": 6.104, + "args": { + "External id": 515598,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274145840681.662, "dur": 62088.497, + "args": { + "External id": 515599,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274145840699.659, "dur": 62055.546, + "args": { + "External id": 515600,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145840806.107, "dur": 22.185, + "args": { + "External id": 515601,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145840852.237, "dur": 61850.733, + "args": { + "External id": 515602,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145840855.216, "dur": 61846.541, + "args": { + "External id": 515603,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145840860.592, "dur": 10.531, + "args": { + "External id": 515604,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145840873.409, "dur": 61821.480, + "args": { + "External id": 515605,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145902890.910, "dur": 13.753, + "args": { + "External id": 515606,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145902894.737, "dur": 9.480, + "args": { + "External id": 515607,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274145902938.803, "dur": 420.501, + "args": { + "External id": 515608,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145902974.681, "dur": 378.741, + "args": { + "External id": 515609,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8728, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274145902987.279, "dur": 359.874, + "args": { + "External id": 515610,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145903383.975, "dur": 2.744, + "args": { + "External id": 515611,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8730, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145903453.579, "dur": 7.706, + "args": { + "External id": 515612,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145903474.670, "dur": 41.919, + "args": { + "External id": 515613,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145903528.238, "dur": 4.257, + "args": { + "External id": 515614,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145903537.416, "dur": 13.455, + "args": { + "External id": 515615,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145903556.729, "dur": 0.885, + "args": { + "External id": 515616,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145903562.349, "dur": 12.539, + "args": { + "External id": 515617,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145903580.170, "dur": 0.989, + "args": { + "External id": 515618,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145903585.438, "dur": 36.943, + "args": { + "External id": 515619,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145903632.549, "dur": 1.943, + "args": { + "External id": 515620,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145903640.838, "dur": 18.099, + "args": { + "External id": 515621,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145903665.069, "dur": 1.584, + "args": { + "External id": 515622,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145903672.874, "dur": 16.965, + "args": { + "External id": 515623,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145903696.109, "dur": 1.383, + "args": { + "External id": 515624,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145903703.185, "dur": 17.411, + "args": { + "External id": 515625,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145903729.333, "dur": 0.939, + "args": { + "External id": 515626,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145903734.582, "dur": 12.714, + "args": { + "External id": 515627,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145903751.953, "dur": 1.181, + "args": { + "External id": 515628,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145903757.018, "dur": 13.128, + "args": { + "External id": 515629,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145903878.034, "dur": 3170.989, + "args": { + "External id": 515630,"Record function id": 0, "Ev Idx": 8749 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.17)", "pid": 2338711, "tid": 2379440, + "ts": 6274145903900.967, "dur": 1213.132, + "args": { + "External id": 515631,"Record function id": 0, "Ev Idx": 8750 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2338711, "tid": 2379440, + "ts": 6274145903917.800, "dur": 390.486, + "args": { + "External id": 515632,"Record function id": 0, "Ev Idx": 8751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145904004.120, "dur": 6.597, + "args": { + "External id": 515633,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145904014.007, "dur": 0.908, + "args": { + "External id": 515634,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145904017.350, "dur": 0.944, + "args": { + "External id": 515635,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145904020.080, "dur": 0.915, + "args": { + "External id": 515636,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145904030.288, "dur": 0.869, + "args": { + "External id": 515637,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145904032.955, "dur": 0.850, + "args": { + "External id": 515638,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145904035.631, "dur": 0.644, + "args": { + "External id": 515639,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145904037.935, "dur": 2.230, + "args": { + "External id": 515640,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145904044.310, "dur": 2.747, + "args": { + "External id": 515641,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145904048.691, "dur": 0.719, + "args": { + "External id": 515642,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145904106.038, "dur": 167.689, + "args": { + "External id": 515643,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145904124.002, "dur": 144.069, + "args": { + "External id": 515644,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145904144.069, "dur": 15.938, + "args": { + "External id": 515645,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145904164.096, "dur": 73.175, + "args": { + "External id": 515646,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145904166.882, "dur": 70.112, + "args": { + "External id": 515647,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145904171.235, "dur": 6.827, + "args": { + "External id": 515648,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145904182.104, "dur": 54.229, + "args": { + "External id": 515649,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8768 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.16", "pid": 2338711, "tid": 2379440, + "ts": 6274145904393.540, "dur": 711.817, + "args": { + "External id": 515650,"Record function id": 0, "Ev Idx": 8769 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2338711, "tid": 2379440, + "ts": 6274145904410.848, "dur": 639.338, + "args": { + "External id": 515651,"Record function id": 0, "Ev Idx": 8770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145904467.736, "dur": 5.341, + "args": { + "External id": 515652,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274145904490.829, "dur": 35.241, + "args": { + "External id": 515653,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145904496.472, "dur": 1.817, + "args": { + "External id": 515654,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145904500.707, "dur": 0.447, + "args": { + "External id": 515655,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145904502.832, "dur": 0.834, + "args": { + "External id": 515656,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145904505.595, "dur": 2.862, + "args": { + "External id": 515657,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145904513.065, "dur": 0.566, + "args": { + "External id": 515658,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145904514.992, "dur": 0.571, + "args": { + "External id": 515659,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145904517.470, "dur": 0.728, + "args": { + "External id": 515660,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145904520.274, "dur": 0.453, + "args": { + "External id": 515661,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145904521.989, "dur": 0.585, + "args": { + "External id": 515662,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145904537.172, "dur": 46.622, + "args": { + "External id": 515663,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274145904649.243, "dur": 147.685, + "args": { + "External id": 515664,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145904663.894, "dur": 6.277, + "args": { + "External id": 515665,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274145904677.156, "dur": 18.075, + "args": { + "External id": 515666,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145904683.023, "dur": 11.642, + "args": { + "External id": 515667,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145904688.298, "dur": 0.865, + "args": { + "External id": 515668,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274145904704.980, "dur": 32.169, + "args": { + "External id": 515669,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145904707.857, "dur": 3.045, + "args": { + "External id": 515670,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145904713.014, "dur": 0.651, + "args": { + "External id": 515671,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145904716.265, "dur": 0.620, + "args": { + "External id": 515672,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145904719.229, "dur": 0.478, + "args": { + "External id": 515673,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145904721.594, "dur": 0.897, + "args": { + "External id": 515674,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145904725.007, "dur": 0.710, + "args": { + "External id": 515675,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145904727.913, "dur": 0.315, + "args": { + "External id": 515676,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145904729.466, "dur": 0.659, + "args": { + "External id": 515677,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145904731.748, "dur": 2.553, + "args": { + "External id": 515678,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145904749.500, "dur": 39.904, + "args": { + "External id": 515679,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274145904846.767, "dur": 126.405, + "args": { + "External id": 515680,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145904875.475, "dur": 93.838, + "args": { + "External id": 515681,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8800, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274145904886.149, "dur": 78.577, + "args": { + "External id": 515682,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145904993.548, "dur": 1.693, + "args": { + "External id": 515683,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8802, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145905122.968, "dur": 1903.451, + "args": { + "External id": 515684,"Sequence number": 5516275, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8803 + } + }, + { + "ph": "f", "id": 412, "pid": 2338711, "tid": 2379440, "ts": 6274145905122.968, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145905241.614, "dur": 119.761, + "args": { + "External id": 515685,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145905407.661, "dur": 41.731, + "args": { + "External id": 515686,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274145905469.964, "dur": 49.755, + "args": { + "External id": 515687,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145905529.693, "dur": 32.270, + "args": { + "External id": 515688,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145905568.240, "dur": 60.351, + "args": { + "External id": 515689,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145905641.450, "dur": 43.184, + "args": { + "External id": 515690,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145905695.363, "dur": 31.892, + "args": { + "External id": 515691,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145905756.909, "dur": 28.666, + "args": { + "External id": 515692,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145905805.422, "dur": 32.003, + "args": { + "External id": 515693,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145905858.714, "dur": 22.857, + "args": { + "External id": 515694,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145905895.727, "dur": 16.344, + "args": { + "External id": 515695,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145905922.982, "dur": 44.337, + "args": { + "External id": 515696,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145905971.003, "dur": 40.061, + "args": { + "External id": 515697,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145906042.753, "dur": 324.338, + "args": { + "External id": 515698,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145906172.717, "dur": 11.408, + "args": { + "External id": 515699,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145906186.973, "dur": 3.491, + "args": { + "External id": 515700,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145906191.792, "dur": 2.342, + "args": { + "External id": 515701,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145906195.239, "dur": 4.383, + "args": { + "External id": 515702,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145906254.978, "dur": 5.368, + "args": { + "External id": 515703,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145906257.113, "dur": 3.012, + "args": { + "External id": 515704,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145906262.360, "dur": 35.038, + "args": { + "External id": 515705,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145906268.428, "dur": 1.834, + "args": { + "External id": 515706,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145906299.056, "dur": 2.166, + "args": { + "External id": 515707,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145906300.392, "dur": 0.738, + "args": { + "External id": 515708,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145906302.811, "dur": 15.730, + "args": { + "External id": 515709,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145906305.240, "dur": 0.909, + "args": { + "External id": 515710,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145906425.793, "dur": 33.673, + "args": { + "External id": 515711,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145906477.020, "dur": 17.808, + "args": { + "External id": 515712,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145906503.859, "dur": 53.569, + "args": { + "External id": 515713,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145906565.848, "dur": 70.602, + "args": { + "External id": 515714,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145906653.103, "dur": 32.968, + "args": { + "External id": 515715,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145906696.649, "dur": 34.946, + "args": { + "External id": 515716,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145906740.123, "dur": 30.431, + "args": { + "External id": 515717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145906777.700, "dur": 32.543, + "args": { + "External id": 515718,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274145906832.290, "dur": 26.881, + "args": { + "External id": 515719,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145906875.631, "dur": 25.308, + "args": { + "External id": 515720,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145906915.051, "dur": 17.279, + "args": { + "External id": 515721,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145906945.678, "dur": 14.445, + "args": { + "External id": 515722,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274145906973.016, "dur": 18.032, + "args": { + "External id": 515723,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145907111.734, "dur": 33.250, + "args": { + "External id": 515724,"Record function id": 0, "Ev Idx": 8843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145907115.574, "dur": 28.006, + "args": { + "External id": 515725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145907134.799, "dur": 7.305, + "args": { + "External id": 515726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145907137.217, "dur": 4.624, + "args": { + "External id": 515727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145907149.167, "dur": 6.097, + "args": { + "External id": 515728,"Record function id": 0, "Ev Idx": 8847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145907150.920, "dur": 3.849, + "args": { + "External id": 515729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145907151.697, "dur": 2.591, + "args": { + "External id": 515730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145907153.010, "dur": 1.143, + "args": { + "External id": 515731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145907158.553, "dur": 4.786, + "args": { + "External id": 515732,"Record function id": 0, "Ev Idx": 8851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145907159.907, "dur": 3.024, + "args": { + "External id": 515733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145907160.636, "dur": 1.867, + "args": { + "External id": 515734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145907161.330, "dur": 1.107, + "args": { + "External id": 515735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145907166.428, "dur": 4.190, + "args": { + "External id": 515736,"Record function id": 0, "Ev Idx": 8855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145907167.441, "dur": 2.752, + "args": { + "External id": 515737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145907168.031, "dur": 1.775, + "args": { + "External id": 515738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145907168.695, "dur": 1.047, + "args": { + "External id": 515739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145907173.610, "dur": 3.757, + "args": { + "External id": 515740,"Record function id": 0, "Ev Idx": 8859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145907174.761, "dur": 2.187, + "args": { + "External id": 515741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145907175.268, "dur": 1.219, + "args": { + "External id": 515742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145907175.739, "dur": 0.679, + "args": { + "External id": 515743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145907180.430, "dur": 6.214, + "args": { + "External id": 515744,"Record function id": 0, "Ev Idx": 8863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145907181.566, "dur": 4.627, + "args": { + "External id": 515745,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145907182.086, "dur": 3.350, + "args": { + "External id": 515746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145907182.461, "dur": 2.891, + "args": { + "External id": 515747,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145907189.676, "dur": 4.524, + "args": { + "External id": 515748,"Record function id": 0, "Ev Idx": 8867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145907190.917, "dur": 2.833, + "args": { + "External id": 515749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145907191.662, "dur": 1.645, + "args": { + "External id": 515750,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145907192.144, "dur": 1.101, + "args": { + "External id": 515751,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145907197.422, "dur": 6.306, + "args": { + "External id": 515752,"Record function id": 0, "Ev Idx": 8871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145907198.595, "dur": 4.715, + "args": { + "External id": 515753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145907199.240, "dur": 3.594, + "args": { + "External id": 515754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145907201.914, "dur": 0.809, + "args": { + "External id": 515755,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145907207.172, "dur": 3.409, + "args": { + "External id": 515756,"Record function id": 0, "Ev Idx": 8875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145907208.136, "dur": 2.026, + "args": { + "External id": 515757,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145907208.754, "dur": 0.993, + "args": { + "External id": 515758,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145907209.036, "dur": 0.645, + "args": { + "External id": 515759,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145907214.695, "dur": 65340.093, + "args": { + "External id": 515760,"Record function id": 0, "Sequence number": 5516274, "Fwd thread id": 1, "Ev Idx": 8879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145907216.054, "dur": 65329.284, + "args": { + "External id": 515761,"Sequence number": 5516274, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8880 + } + }, + { + "ph": "f", "id": 413, "pid": 2338711, "tid": 2379440, "ts": 6274145907216.054, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.17)", "pid": 2338711, "tid": 2379440, + "ts": 6274145907251.380, "dur": 43.442, + "args": { + "External id": 515762,"Record function id": 0, "Ev Idx": 8881 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.17)", "pid": 2338711, "tid": 2379440, + "ts": 6274145907302.473, "dur": 66.749, + "args": { + "External id": 515763,"Record function id": 0, "Ev Idx": 8882 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.17)", "pid": 2338711, "tid": 2379440, + "ts": 6274145907375.113, "dur": 65161.746, + "args": { + "External id": 515764,"Record function id": 0, "Ev Idx": 8883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145907475.331, "dur": 8.525, + "args": { + "External id": 515765,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145907494.615, "dur": 5.174, + "args": { + "External id": 515766,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274145907514.618, "dur": 63900.580, + "args": { + "External id": 515767,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274145907532.910, "dur": 63868.021, + "args": { + "External id": 515768,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145907668.719, "dur": 31.872, + "args": { + "External id": 515769,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145907730.015, "dur": 63619.578, + "args": { + "External id": 515770,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145907733.911, "dur": 63614.437, + "args": { + "External id": 515771,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145907746.354, "dur": 13.170, + "args": { + "External id": 515772,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145907762.622, "dur": 63579.065, + "args": { + "External id": 515773,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145971537.270, "dur": 13.619, + "args": { + "External id": 515774,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145971541.224, "dur": 9.246, + "args": { + "External id": 515775,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274145971586.313, "dur": 507.378, + "args": { + "External id": 515776,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145971641.017, "dur": 441.574, + "args": { + "External id": 515777,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8896, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274145971657.860, "dur": 388.780, + "args": { + "External id": 515778,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145972120.834, "dur": 3.700, + "args": { + "External id": 515779,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8898, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145972204.439, "dur": 7.961, + "args": { + "External id": 515780,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145972226.537, "dur": 42.196, + "args": { + "External id": 515781,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145972280.181, "dur": 4.142, + "args": { + "External id": 515782,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145972293.272, "dur": 13.824, + "args": { + "External id": 515783,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145972313.461, "dur": 0.913, + "args": { + "External id": 515784,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145972318.850, "dur": 14.027, + "args": { + "External id": 515785,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145972337.767, "dur": 0.775, + "args": { + "External id": 515786,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145972342.796, "dur": 15.124, + "args": { + "External id": 515787,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145972362.601, "dur": 0.966, + "args": { + "External id": 515788,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145972367.609, "dur": 13.756, + "args": { + "External id": 515789,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145972385.982, "dur": 0.940, + "args": { + "External id": 515790,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145972390.627, "dur": 11.847, + "args": { + "External id": 515791,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145972407.696, "dur": 0.930, + "args": { + "External id": 515792,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145972412.655, "dur": 12.800, + "args": { + "External id": 515793,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145972432.131, "dur": 0.772, + "args": { + "External id": 515794,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145972436.820, "dur": 12.116, + "args": { + "External id": 515795,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145972453.046, "dur": 0.945, + "args": { + "External id": 515796,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274145972457.719, "dur": 13.388, + "args": { + "External id": 515797,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145972572.695, "dur": 3229.962, + "args": { + "External id": 515798,"Record function id": 0, "Ev Idx": 8917 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.16)", "pid": 2338711, "tid": 2379440, + "ts": 6274145972617.778, "dur": 1186.503, + "args": { + "External id": 515799,"Record function id": 0, "Ev Idx": 8918 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2338711, "tid": 2379440, + "ts": 6274145972640.711, "dur": 360.797, + "args": { + "External id": 515800,"Record function id": 0, "Ev Idx": 8919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145972741.241, "dur": 8.396, + "args": { + "External id": 515801,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145972753.714, "dur": 0.904, + "args": { + "External id": 515802,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145972756.694, "dur": 0.963, + "args": { + "External id": 515803,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145972759.637, "dur": 0.815, + "args": { + "External id": 515804,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145972761.866, "dur": 1.064, + "args": { + "External id": 515805,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145972766.223, "dur": 0.919, + "args": { + "External id": 515806,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145972768.650, "dur": 0.730, + "args": { + "External id": 515807,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145972770.933, "dur": 2.216, + "args": { + "External id": 515808,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145972774.391, "dur": 2.778, + "args": { + "External id": 515809,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145972779.978, "dur": 0.809, + "args": { + "External id": 515810,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145972798.906, "dur": 166.880, + "args": { + "External id": 515811,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145972817.646, "dur": 143.084, + "args": { + "External id": 515812,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145972839.227, "dur": 15.109, + "args": { + "External id": 515813,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145972858.853, "dur": 72.481, + "args": { + "External id": 515814,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145972862.054, "dur": 68.953, + "args": { + "External id": 515815,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145972866.422, "dur": 6.017, + "args": { + "External id": 515816,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145972874.612, "dur": 55.720, + "args": { + "External id": 515817,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8936 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.15", "pid": 2338711, "tid": 2379440, + "ts": 6274145973130.217, "dur": 665.731, + "args": { + "External id": 515818,"Record function id": 0, "Ev Idx": 8937 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2338711, "tid": 2379440, + "ts": 6274145973150.081, "dur": 631.961, + "args": { + "External id": 515819,"Record function id": 0, "Ev Idx": 8938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145973213.102, "dur": 7.245, + "args": { + "External id": 515820,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274145973237.162, "dur": 31.524, + "args": { + "External id": 515821,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145973243.283, "dur": 2.031, + "args": { + "External id": 515822,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145973248.092, "dur": 0.557, + "args": { + "External id": 515823,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145973249.962, "dur": 0.581, + "args": { + "External id": 515824,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145973252.564, "dur": 2.944, + "args": { + "External id": 515825,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145973257.293, "dur": 0.385, + "args": { + "External id": 515826,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145973258.755, "dur": 0.480, + "args": { + "External id": 515827,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145973260.819, "dur": 0.363, + "args": { + "External id": 515828,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145973262.667, "dur": 0.543, + "args": { + "External id": 515829,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145973264.208, "dur": 0.359, + "args": { + "External id": 515830,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145973280.350, "dur": 48.248, + "args": { + "External id": 515831,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274145973363.529, "dur": 117.547, + "args": { + "External id": 515832,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145973375.414, "dur": 3.399, + "args": { + "External id": 515833,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274145973383.915, "dur": 11.002, + "args": { + "External id": 515834,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274145973389.124, "dur": 5.391, + "args": { + "External id": 515835,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145973392.815, "dur": 0.544, + "args": { + "External id": 515836,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274145973401.924, "dur": 25.526, + "args": { + "External id": 515837,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145973403.833, "dur": 2.651, + "args": { + "External id": 515838,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145973408.152, "dur": 0.412, + "args": { + "External id": 515839,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145973410.442, "dur": 0.402, + "args": { + "External id": 515840,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145973412.327, "dur": 0.372, + "args": { + "External id": 515841,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145973414.556, "dur": 0.460, + "args": { + "External id": 515842,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145973416.926, "dur": 0.295, + "args": { + "External id": 515843,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145973418.188, "dur": 0.374, + "args": { + "External id": 515844,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145973420.626, "dur": 0.492, + "args": { + "External id": 515845,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145973422.564, "dur": 2.347, + "args": { + "External id": 515846,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145973441.147, "dur": 32.713, + "args": { + "External id": 515847,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274145973529.190, "dur": 153.980, + "args": { + "External id": 515848,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145973557.550, "dur": 121.149, + "args": { + "External id": 515849,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8968, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274145973568.043, "dur": 104.977, + "args": { + "External id": 515850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274145973710.480, "dur": 1.927, + "args": { + "External id": 515851,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8970, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145973813.307, "dur": 1966.365, + "args": { + "External id": 515852,"Sequence number": 5516273, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8971 + } + }, + { + "ph": "f", "id": 414, "pid": 2338711, "tid": 2379440, "ts": 6274145973813.307, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145973935.322, "dur": 159.641, + "args": { + "External id": 515853,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145974145.831, "dur": 42.893, + "args": { + "External id": 515854,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274145974211.507, "dur": 58.604, + "args": { + "External id": 515855,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145974280.711, "dur": 34.818, + "args": { + "External id": 515856,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145974322.102, "dur": 34.338, + "args": { + "External id": 515857,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145974363.058, "dur": 30.067, + "args": { + "External id": 515858,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145974402.121, "dur": 30.651, + "args": { + "External id": 515859,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145974458.967, "dur": 25.852, + "args": { + "External id": 515860,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274145974504.006, "dur": 32.527, + "args": { + "External id": 515861,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145974556.987, "dur": 20.798, + "args": { + "External id": 515862,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145974613.449, "dur": 27.314, + "args": { + "External id": 515863,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145974658.080, "dur": 48.675, + "args": { + "External id": 515864,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145974711.668, "dur": 36.782, + "args": { + "External id": 515865,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274145974781.117, "dur": 264.691, + "args": { + "External id": 515866,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145974866.295, "dur": 7.533, + "args": { + "External id": 515867,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145974875.922, "dur": 3.363, + "args": { + "External id": 515868,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145974881.167, "dur": 2.232, + "args": { + "External id": 515869,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145974884.601, "dur": 5.128, + "args": { + "External id": 515870,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145974939.239, "dur": 5.504, + "args": { + "External id": 515871,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145974941.393, "dur": 3.117, + "args": { + "External id": 515872,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145974946.665, "dur": 33.220, + "args": { + "External id": 515873,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145974952.774, "dur": 1.834, + "args": { + "External id": 515874,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274145974981.629, "dur": 2.389, + "args": { + "External id": 515875,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145974983.190, "dur": 0.755, + "args": { + "External id": 515876,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274145974985.470, "dur": 16.147, + "args": { + "External id": 515877,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145974987.937, "dur": 0.883, + "args": { + "External id": 515878,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274145975127.701, "dur": 32.183, + "args": { + "External id": 515879,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145975177.656, "dur": 24.676, + "args": { + "External id": 515880,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145975221.604, "dur": 54.844, + "args": { + "External id": 515881,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145975284.908, "dur": 42.234, + "args": { + "External id": 515882,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145975340.536, "dur": 20.924, + "args": { + "External id": 515883,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145975371.050, "dur": 34.987, + "args": { + "External id": 515884,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145975414.221, "dur": 31.099, + "args": { + "External id": 515885,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274145975452.729, "dur": 34.296, + "args": { + "External id": 515886,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274145975506.367, "dur": 26.161, + "args": { + "External id": 515887,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145975548.508, "dur": 27.040, + "args": { + "External id": 515888,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274145975619.771, "dur": 30.420, + "args": { + "External id": 515889,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274145975674.302, "dur": 23.388, + "args": { + "External id": 515890,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274145975717.670, "dur": 24.076, + "args": { + "External id": 515891,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145975827.950, "dur": 18.282, + "args": { + "External id": 515892,"Record function id": 0, "Ev Idx": 9011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145975831.907, "dur": 13.292, + "args": { + "External id": 515893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145975836.826, "dur": 7.303, + "args": { + "External id": 515894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145975839.306, "dur": 4.704, + "args": { + "External id": 515895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145975850.177, "dur": 6.242, + "args": { + "External id": 515896,"Record function id": 0, "Ev Idx": 9015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145975851.444, "dur": 4.566, + "args": { + "External id": 515897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145975852.184, "dur": 3.267, + "args": { + "External id": 515898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145975853.750, "dur": 1.566, + "args": { + "External id": 515899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145975859.717, "dur": 4.541, + "args": { + "External id": 515900,"Record function id": 0, "Ev Idx": 9019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145975860.937, "dur": 2.893, + "args": { + "External id": 515901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145975861.582, "dur": 1.814, + "args": { + "External id": 515902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145975862.180, "dur": 1.154, + "args": { + "External id": 515903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145975867.393, "dur": 3.816, + "args": { + "External id": 515904,"Record function id": 0, "Ev Idx": 9023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145975868.586, "dur": 2.226, + "args": { + "External id": 515905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145975869.180, "dur": 1.233, + "args": { + "External id": 515906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145975869.549, "dur": 0.800, + "args": { + "External id": 515907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145975874.340, "dur": 3.923, + "args": { + "External id": 515908,"Record function id": 0, "Ev Idx": 9027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145975875.347, "dur": 2.462, + "args": { + "External id": 515909,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145975875.997, "dur": 1.310, + "args": { + "External id": 515910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145975876.533, "dur": 0.708, + "args": { + "External id": 515911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145975881.351, "dur": 6.184, + "args": { + "External id": 515912,"Record function id": 0, "Ev Idx": 9031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145975882.393, "dur": 4.700, + "args": { + "External id": 515913,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145975882.862, "dur": 3.840, + "args": { + "External id": 515914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145975883.370, "dur": 3.239, + "args": { + "External id": 515915,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145975890.683, "dur": 3.448, + "args": { + "External id": 515916,"Record function id": 0, "Ev Idx": 9035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145975891.722, "dur": 1.969, + "args": { + "External id": 515917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145975892.154, "dur": 0.997, + "args": { + "External id": 515918,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145975892.422, "dur": 0.655, + "args": { + "External id": 515919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145975897.560, "dur": 3.876, + "args": { + "External id": 515920,"Record function id": 0, "Ev Idx": 9039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145975898.621, "dur": 2.419, + "args": { + "External id": 515921,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145975899.204, "dur": 1.407, + "args": { + "External id": 515922,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145975899.916, "dur": 0.601, + "args": { + "External id": 515923,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145975904.901, "dur": 4.137, + "args": { + "External id": 515924,"Record function id": 0, "Ev Idx": 9043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274145975906.003, "dur": 2.615, + "args": { + "External id": 515925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145975906.451, "dur": 1.718, + "args": { + "External id": 515926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274145975907.150, "dur": 0.894, + "args": { + "External id": 515927,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145975913.201, "dur": 63396.227, + "args": { + "External id": 515928,"Record function id": 0, "Sequence number": 5516272, "Fwd thread id": 1, "Ev Idx": 9047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274145975914.398, "dur": 63384.489, + "args": { + "External id": 515929,"Sequence number": 5516272, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9048 + } + }, + { + "ph": "f", "id": 415, "pid": 2338711, "tid": 2379440, "ts": 6274145975914.398, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.16)", "pid": 2338711, "tid": 2379440, + "ts": 6274145975950.700, "dur": 44.307, + "args": { + "External id": 515930,"Record function id": 0, "Ev Idx": 9049 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.16)", "pid": 2338711, "tid": 2379440, + "ts": 6274145976003.485, "dur": 110.781, + "args": { + "External id": 515931,"Record function id": 0, "Ev Idx": 9050 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.16)", "pid": 2338711, "tid": 2379440, + "ts": 6274145976123.509, "dur": 63166.145, + "args": { + "External id": 515932,"Record function id": 0, "Ev Idx": 9051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145976225.527, "dur": 8.695, + "args": { + "External id": 515933,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274145976246.229, "dur": 5.516, + "args": { + "External id": 515934,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274145976267.386, "dur": 61951.562, + "args": { + "External id": 515935,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274145976284.154, "dur": 61921.152, + "args": { + "External id": 515936,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274145976384.525, "dur": 19.803, + "args": { + "External id": 515937,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274145976425.171, "dur": 61726.683, + "args": { + "External id": 515938,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274145976428.409, "dur": 61721.965, + "args": { + "External id": 515939,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274145976434.044, "dur": 10.487, + "args": { + "External id": 515940,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274145976447.116, "dur": 61696.087, + "args": { + "External id": 515941,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146038341.727, "dur": 15.630, + "args": { + "External id": 515942,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146038345.951, "dur": 10.975, + "args": { + "External id": 515943,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146038391.327, "dur": 430.342, + "args": { + "External id": 515944,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146038422.834, "dur": 392.538, + "args": { + "External id": 515945,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9064, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146038435.097, "dur": 372.918, + "args": { + "External id": 515946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146038852.542, "dur": 2.525, + "args": { + "External id": 515947,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9066, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146038924.575, "dur": 8.046, + "args": { + "External id": 515948,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146038946.392, "dur": 38.441, + "args": { + "External id": 515949,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146038996.862, "dur": 3.723, + "args": { + "External id": 515950,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146039006.547, "dur": 12.703, + "args": { + "External id": 515951,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146039025.815, "dur": 0.925, + "args": { + "External id": 515952,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146039031.750, "dur": 13.478, + "args": { + "External id": 515953,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146039050.653, "dur": 0.910, + "args": { + "External id": 515954,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146039093.243, "dur": 16.306, + "args": { + "External id": 515955,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146039116.302, "dur": 1.326, + "args": { + "External id": 515956,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146039122.732, "dur": 12.721, + "args": { + "External id": 515957,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146039139.944, "dur": 1.231, + "args": { + "External id": 515958,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146039145.346, "dur": 11.962, + "args": { + "External id": 515959,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146039161.887, "dur": 0.989, + "args": { + "External id": 515960,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146039166.846, "dur": 12.318, + "args": { + "External id": 515961,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146039183.192, "dur": 0.714, + "args": { + "External id": 515962,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146039188.346, "dur": 11.415, + "args": { + "External id": 515963,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146039206.414, "dur": 0.929, + "args": { + "External id": 515964,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146039211.228, "dur": 10.967, + "args": { + "External id": 515965,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146039327.275, "dur": 3235.402, + "args": { + "External id": 515966,"Record function id": 0, "Ev Idx": 9085 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.15)", "pid": 2338711, "tid": 2379440, + "ts": 6274146039350.273, "dur": 1199.566, + "args": { + "External id": 515967,"Record function id": 0, "Ev Idx": 9086 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2338711, "tid": 2379440, + "ts": 6274146039367.064, "dur": 403.622, + "args": { + "External id": 515968,"Record function id": 0, "Ev Idx": 9087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146039452.437, "dur": 7.043, + "args": { + "External id": 515969,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146039462.944, "dur": 0.947, + "args": { + "External id": 515970,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146039466.080, "dur": 0.930, + "args": { + "External id": 515971,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146039468.800, "dur": 1.054, + "args": { + "External id": 515972,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146039471.397, "dur": 0.939, + "args": { + "External id": 515973,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146039474.047, "dur": 1.054, + "args": { + "External id": 515974,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146039478.577, "dur": 0.823, + "args": { + "External id": 515975,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146039480.710, "dur": 2.291, + "args": { + "External id": 515976,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146039484.731, "dur": 2.993, + "args": { + "External id": 515977,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146039488.880, "dur": 0.660, + "args": { + "External id": 515978,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146039511.813, "dur": 215.574, + "args": { + "External id": 515979,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146039530.337, "dur": 190.074, + "args": { + "External id": 515980,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146039552.357, "dur": 15.448, + "args": { + "External id": 515981,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146039572.157, "dur": 107.592, + "args": { + "External id": 515982,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146039575.273, "dur": 103.965, + "args": { + "External id": 515983,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146039579.976, "dur": 5.772, + "args": { + "External id": 515984,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146039605.864, "dur": 68.036, + "args": { + "External id": 515985,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.14", "pid": 2338711, "tid": 2379440, + "ts": 6274146039867.293, "dur": 674.295, + "args": { + "External id": 515986,"Record function id": 0, "Ev Idx": 9105 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2338711, "tid": 2379440, + "ts": 6274146039887.648, "dur": 639.847, + "args": { + "External id": 515987,"Record function id": 0, "Ev Idx": 9106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146039951.390, "dur": 7.381, + "args": { + "External id": 515988,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146039975.516, "dur": 31.993, + "args": { + "External id": 515989,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146039981.391, "dur": 1.697, + "args": { + "External id": 515990,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146039985.137, "dur": 0.543, + "args": { + "External id": 515991,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146039987.333, "dur": 0.512, + "args": { + "External id": 515992,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146039989.651, "dur": 2.525, + "args": { + "External id": 515993,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146039993.486, "dur": 0.597, + "args": { + "External id": 515994,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146039996.107, "dur": 0.524, + "args": { + "External id": 515995,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146039998.151, "dur": 0.346, + "args": { + "External id": 515996,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146039999.615, "dur": 0.469, + "args": { + "External id": 515997,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146040002.351, "dur": 0.689, + "args": { + "External id": 515998,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146040018.718, "dur": 79.415, + "args": { + "External id": 515999,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274146040136.379, "dur": 137.192, + "args": { + "External id": 516000,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146040149.118, "dur": 4.699, + "args": { + "External id": 516001,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274146040159.689, "dur": 12.080, + "args": { + "External id": 516002,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274146040165.089, "dur": 6.243, + "args": { + "External id": 516003,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146040169.187, "dur": 0.644, + "args": { + "External id": 516004,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146040179.178, "dur": 42.950, + "args": { + "External id": 516005,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146040196.398, "dur": 2.969, + "args": { + "External id": 516006,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146040201.277, "dur": 0.463, + "args": { + "External id": 516007,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146040203.371, "dur": 0.648, + "args": { + "External id": 516008,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146040205.664, "dur": 0.448, + "args": { + "External id": 516009,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146040208.507, "dur": 0.422, + "args": { + "External id": 516010,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146040210.893, "dur": 0.366, + "args": { + "External id": 516011,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146040212.156, "dur": 0.804, + "args": { + "External id": 516012,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146040215.086, "dur": 0.313, + "args": { + "External id": 516013,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146040216.716, "dur": 2.686, + "args": { + "External id": 516014,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146040233.993, "dur": 31.988, + "args": { + "External id": 516015,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146040323.669, "dur": 128.252, + "args": { + "External id": 516016,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146040353.380, "dur": 94.757, + "args": { + "External id": 516017,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9136, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146040364.315, "dur": 79.194, + "args": { + "External id": 516018,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146040471.275, "dur": 2.134, + "args": { + "External id": 516019,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9138, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146040561.165, "dur": 1979.334, + "args": { + "External id": 516020,"Sequence number": 5516271, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9139 + } + }, + { + "ph": "f", "id": 416, "pid": 2338711, "tid": 2379440, "ts": 6274146040561.165, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146040723.564, "dur": 122.526, + "args": { + "External id": 516021,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146040896.397, "dur": 40.871, + "args": { + "External id": 516022,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274146040957.828, "dur": 51.542, + "args": { + "External id": 516023,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146041019.429, "dur": 31.990, + "args": { + "External id": 516024,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146041102.926, "dur": 39.644, + "args": { + "External id": 516025,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146041150.001, "dur": 30.153, + "args": { + "External id": 516026,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146041189.151, "dur": 30.577, + "args": { + "External id": 516027,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146041249.054, "dur": 26.714, + "args": { + "External id": 516028,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146041300.310, "dur": 32.582, + "args": { + "External id": 516029,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146041358.326, "dur": 21.986, + "args": { + "External id": 516030,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146041394.564, "dur": 17.719, + "args": { + "External id": 516031,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146041423.733, "dur": 41.464, + "args": { + "External id": 516032,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146041468.815, "dur": 35.115, + "args": { + "External id": 516033,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274146041539.726, "dur": 329.360, + "args": { + "External id": 516034,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146041656.285, "dur": 9.615, + "args": { + "External id": 516035,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146041668.965, "dur": 4.094, + "args": { + "External id": 516036,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146041675.175, "dur": 3.017, + "args": { + "External id": 516037,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146041680.036, "dur": 5.523, + "args": { + "External id": 516038,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146041740.050, "dur": 5.716, + "args": { + "External id": 516039,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146041742.382, "dur": 3.177, + "args": { + "External id": 516040,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146041748.095, "dur": 34.262, + "args": { + "External id": 516041,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146041754.243, "dur": 1.986, + "args": { + "External id": 516042,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146041784.573, "dur": 1.929, + "args": { + "External id": 516043,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146041785.699, "dur": 0.702, + "args": { + "External id": 516044,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146041787.805, "dur": 16.302, + "args": { + "External id": 516045,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146041790.478, "dur": 0.736, + "args": { + "External id": 516046,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146041912.515, "dur": 36.730, + "args": { + "External id": 516047,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146041967.569, "dur": 17.458, + "args": { + "External id": 516048,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146041997.736, "dur": 54.658, + "args": { + "External id": 516049,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146042098.705, "dur": 49.176, + "args": { + "External id": 516050,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146042159.400, "dur": 21.887, + "args": { + "External id": 516051,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146042190.722, "dur": 33.390, + "args": { + "External id": 516052,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146042233.291, "dur": 29.648, + "args": { + "External id": 516053,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146042281.590, "dur": 35.720, + "args": { + "External id": 516054,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274146042338.650, "dur": 27.781, + "args": { + "External id": 516055,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146042384.640, "dur": 25.349, + "args": { + "External id": 516056,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146042425.129, "dur": 18.058, + "args": { + "External id": 516057,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146042458.975, "dur": 14.097, + "args": { + "External id": 516058,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274146042487.069, "dur": 17.344, + "args": { + "External id": 516059,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146042605.262, "dur": 23.105, + "args": { + "External id": 516060,"Record function id": 0, "Ev Idx": 9179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146042610.639, "dur": 16.033, + "args": { + "External id": 516061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146042616.278, "dur": 8.282, + "args": { + "External id": 516062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146042618.592, "dur": 5.529, + "args": { + "External id": 516063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146042635.796, "dur": 7.804, + "args": { + "External id": 516064,"Record function id": 0, "Ev Idx": 9183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146042637.735, "dur": 5.189, + "args": { + "External id": 516065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146042638.928, "dur": 3.298, + "args": { + "External id": 516066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146042640.486, "dur": 1.557, + "args": { + "External id": 516067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146042648.606, "dur": 6.826, + "args": { + "External id": 516068,"Record function id": 0, "Ev Idx": 9187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146042650.847, "dur": 3.925, + "args": { + "External id": 516069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146042651.850, "dur": 2.332, + "args": { + "External id": 516070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146042652.757, "dur": 1.255, + "args": { + "External id": 516071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146042660.366, "dur": 6.381, + "args": { + "External id": 516072,"Record function id": 0, "Ev Idx": 9191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146042662.372, "dur": 3.720, + "args": { + "External id": 516073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146042663.534, "dur": 1.971, + "args": { + "External id": 516074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146042664.272, "dur": 1.070, + "args": { + "External id": 516075,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146042671.305, "dur": 6.805, + "args": { + "External id": 516076,"Record function id": 0, "Ev Idx": 9195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146042673.552, "dur": 3.815, + "args": { + "External id": 516077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146042674.627, "dur": 1.966, + "args": { + "External id": 516078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146042675.499, "dur": 0.939, + "args": { + "External id": 516079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146042682.753, "dur": 9.629, + "args": { + "External id": 516080,"Record function id": 0, "Ev Idx": 9199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146042684.699, "dur": 6.571, + "args": { + "External id": 516081,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146042685.599, "dur": 5.080, + "args": { + "External id": 516082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146042686.580, "dur": 3.888, + "args": { + "External id": 516083,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146042696.004, "dur": 4.355, + "args": { + "External id": 516084,"Record function id": 0, "Ev Idx": 9203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146042697.386, "dur": 2.513, + "args": { + "External id": 516085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146042697.974, "dur": 1.441, + "args": { + "External id": 516086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146042698.680, "dur": 0.655, + "args": { + "External id": 516087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146042703.892, "dur": 4.224, + "args": { + "External id": 516088,"Record function id": 0, "Ev Idx": 9207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146042704.906, "dur": 2.774, + "args": { + "External id": 516089,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146042705.559, "dur": 1.371, + "args": { + "External id": 516090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146042706.236, "dur": 0.585, + "args": { + "External id": 516091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146042711.449, "dur": 4.425, + "args": { + "External id": 516092,"Record function id": 0, "Ev Idx": 9211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146042712.689, "dur": 2.798, + "args": { + "External id": 516093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146042713.403, "dur": 1.627, + "args": { + "External id": 516094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146042714.325, "dur": 0.564, + "args": { + "External id": 516095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146042720.034, "dur": 64135.486, + "args": { + "External id": 516096,"Record function id": 0, "Sequence number": 5516270, "Fwd thread id": 1, "Ev Idx": 9215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146042750.273, "dur": 64093.614, + "args": { + "External id": 516097,"Sequence number": 5516270, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9216 + } + }, + { + "ph": "f", "id": 417, "pid": 2338711, "tid": 2379440, "ts": 6274146042750.273, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.15)", "pid": 2338711, "tid": 2379440, + "ts": 6274146042788.610, "dur": 46.323, + "args": { + "External id": 516098,"Record function id": 0, "Ev Idx": 9217 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.15)", "pid": 2338711, "tid": 2379440, + "ts": 6274146042843.447, "dur": 69.189, + "args": { + "External id": 516099,"Record function id": 0, "Ev Idx": 9218 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.15)", "pid": 2338711, "tid": 2379440, + "ts": 6274146042918.670, "dur": 63915.922, + "args": { + "External id": 516100,"Record function id": 0, "Ev Idx": 9219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146043021.833, "dur": 8.519, + "args": { + "External id": 516101,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146043041.375, "dur": 4.935, + "args": { + "External id": 516102,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146043099.883, "dur": 62779.296, + "args": { + "External id": 516103,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146043116.525, "dur": 62747.836, + "args": { + "External id": 516104,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146043216.444, "dur": 19.964, + "args": { + "External id": 516105,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146043259.835, "dur": 62554.374, + "args": { + "External id": 516106,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146043263.031, "dur": 62549.788, + "args": { + "External id": 516107,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146043269.008, "dur": 11.779, + "args": { + "External id": 516108,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146043283.102, "dur": 62523.063, + "args": { + "External id": 516109,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146106001.043, "dur": 14.002, + "args": { + "External id": 516110,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146106005.176, "dur": 9.399, + "args": { + "External id": 516111,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146106049.634, "dur": 347.065, + "args": { + "External id": 516112,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146106114.763, "dur": 276.818, + "args": { + "External id": 516113,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9232, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146106128.107, "dur": 257.503, + "args": { + "External id": 516114,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146106418.966, "dur": 2.309, + "args": { + "External id": 516115,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9234, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146106483.753, "dur": 7.498, + "args": { + "External id": 516116,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146106505.228, "dur": 33.103, + "args": { + "External id": 516117,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146106549.618, "dur": 3.970, + "args": { + "External id": 516118,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146106558.852, "dur": 12.242, + "args": { + "External id": 516119,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146106577.122, "dur": 1.224, + "args": { + "External id": 516120,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146106583.118, "dur": 34.060, + "args": { + "External id": 516121,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146106628.644, "dur": 1.912, + "args": { + "External id": 516122,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146106637.118, "dur": 15.900, + "args": { + "External id": 516123,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146106659.811, "dur": 1.361, + "args": { + "External id": 516124,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146106667.257, "dur": 14.540, + "args": { + "External id": 516125,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146106688.579, "dur": 1.427, + "args": { + "External id": 516126,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146106694.929, "dur": 10.895, + "args": { + "External id": 516127,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146106711.051, "dur": 1.236, + "args": { + "External id": 516128,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146106716.344, "dur": 11.124, + "args": { + "External id": 516129,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146106731.690, "dur": 0.915, + "args": { + "External id": 516130,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146106737.101, "dur": 9.509, + "args": { + "External id": 516131,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146106751.064, "dur": 1.008, + "args": { + "External id": 516132,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146106756.318, "dur": 10.436, + "args": { + "External id": 516133,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146106873.806, "dur": 3271.230, + "args": { + "External id": 516134,"Record function id": 0, "Ev Idx": 9253 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.14)", "pid": 2338711, "tid": 2379440, + "ts": 6274146106896.981, "dur": 1235.381, + "args": { + "External id": 516135,"Record function id": 0, "Ev Idx": 9254 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2338711, "tid": 2379440, + "ts": 6274146106916.302, "dur": 400.315, + "args": { + "External id": 516136,"Record function id": 0, "Ev Idx": 9255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146107009.908, "dur": 7.680, + "args": { + "External id": 516137,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146107021.152, "dur": 1.190, + "args": { + "External id": 516138,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146107024.726, "dur": 1.507, + "args": { + "External id": 516139,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146107028.079, "dur": 0.883, + "args": { + "External id": 516140,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146107030.753, "dur": 0.790, + "args": { + "External id": 516141,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146107032.852, "dur": 0.836, + "args": { + "External id": 516142,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146107035.142, "dur": 0.948, + "args": { + "External id": 516143,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146107037.721, "dur": 2.309, + "args": { + "External id": 516144,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146107043.350, "dur": 3.306, + "args": { + "External id": 516145,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146107047.992, "dur": 0.690, + "args": { + "External id": 516146,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146107104.722, "dur": 173.380, + "args": { + "External id": 516147,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146107124.523, "dur": 148.068, + "args": { + "External id": 516148,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146107146.337, "dur": 19.025, + "args": { + "External id": 516149,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146107169.697, "dur": 74.598, + "args": { + "External id": 516150,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146107172.868, "dur": 71.049, + "args": { + "External id": 516151,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146107177.495, "dur": 6.627, + "args": { + "External id": 516152,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146107186.367, "dur": 56.810, + "args": { + "External id": 516153,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9272 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.13", "pid": 2338711, "tid": 2379440, + "ts": 6274146107409.494, "dur": 714.295, + "args": { + "External id": 516154,"Record function id": 0, "Ev Idx": 9273 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2338711, "tid": 2379440, + "ts": 6274146107428.800, "dur": 680.111, + "args": { + "External id": 516155,"Record function id": 0, "Ev Idx": 9274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146107491.171, "dur": 5.884, + "args": { + "External id": 516156,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146107513.307, "dur": 30.294, + "args": { + "External id": 516157,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146107519.039, "dur": 1.957, + "args": { + "External id": 516158,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146107522.887, "dur": 0.872, + "args": { + "External id": 516159,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146107525.459, "dur": 0.440, + "args": { + "External id": 516160,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146107527.519, "dur": 2.563, + "args": { + "External id": 516161,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146107531.173, "dur": 0.674, + "args": { + "External id": 516162,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146107533.655, "dur": 0.477, + "args": { + "External id": 516163,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146107535.481, "dur": 0.382, + "args": { + "External id": 516164,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146107536.932, "dur": 0.583, + "args": { + "External id": 516165,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146107539.456, "dur": 0.470, + "args": { + "External id": 516166,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146107554.250, "dur": 63.958, + "args": { + "External id": 516167,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274146107663.618, "dur": 145.669, + "args": { + "External id": 516168,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146107678.493, "dur": 5.925, + "args": { + "External id": 516169,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274146107691.279, "dur": 14.639, + "args": { + "External id": 516170,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274146107696.744, "dur": 8.594, + "args": { + "External id": 516171,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146107701.920, "dur": 1.102, + "args": { + "External id": 516172,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146107715.326, "dur": 34.942, + "args": { + "External id": 516173,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146107718.166, "dur": 3.102, + "args": { + "External id": 516174,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146107723.833, "dur": 0.788, + "args": { + "External id": 516175,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146107726.888, "dur": 0.526, + "args": { + "External id": 516176,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146107729.954, "dur": 0.540, + "args": { + "External id": 516177,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146107732.624, "dur": 0.965, + "args": { + "External id": 516178,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146107735.947, "dur": 0.837, + "args": { + "External id": 516179,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146107738.909, "dur": 0.594, + "args": { + "External id": 516180,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146107741.145, "dur": 0.526, + "args": { + "External id": 516181,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146107744.115, "dur": 2.955, + "args": { + "External id": 516182,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146107762.538, "dur": 38.762, + "args": { + "External id": 516183,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146107866.388, "dur": 123.808, + "args": { + "External id": 516184,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146107893.033, "dur": 93.486, + "args": { + "External id": 516185,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9304, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146107903.959, "dur": 78.103, + "args": { + "External id": 516186,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146108010.568, "dur": 2.059, + "args": { + "External id": 516187,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9306, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146108142.584, "dur": 1978.510, + "args": { + "External id": 516188,"Sequence number": 5516269, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9307 + } + }, + { + "ph": "f", "id": 418, "pid": 2338711, "tid": 2379440, "ts": 6274146108142.584, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146108262.592, "dur": 116.291, + "args": { + "External id": 516189,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146108424.833, "dur": 39.491, + "args": { + "External id": 516190,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274146108485.018, "dur": 50.685, + "args": { + "External id": 516191,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146108546.269, "dur": 31.810, + "args": { + "External id": 516192,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146108584.690, "dur": 66.684, + "args": { + "External id": 516193,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146108664.799, "dur": 41.101, + "args": { + "External id": 516194,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146108716.519, "dur": 31.831, + "args": { + "External id": 516195,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146108780.994, "dur": 33.051, + "args": { + "External id": 516196,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146108834.405, "dur": 35.150, + "args": { + "External id": 516197,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146108890.394, "dur": 21.011, + "args": { + "External id": 516198,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146108926.181, "dur": 17.829, + "args": { + "External id": 516199,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146108955.121, "dur": 38.456, + "args": { + "External id": 516200,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146108997.205, "dur": 35.575, + "args": { + "External id": 516201,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274146109104.238, "dur": 286.349, + "args": { + "External id": 516202,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146109191.815, "dur": 11.583, + "args": { + "External id": 516203,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146109206.201, "dur": 2.851, + "args": { + "External id": 516204,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146109210.682, "dur": 2.492, + "args": { + "External id": 516205,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146109214.446, "dur": 4.367, + "args": { + "External id": 516206,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146109280.717, "dur": 5.903, + "args": { + "External id": 516207,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146109283.031, "dur": 3.354, + "args": { + "External id": 516208,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146109288.755, "dur": 33.782, + "args": { + "External id": 516209,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146109294.957, "dur": 1.693, + "args": { + "External id": 516210,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146109324.097, "dur": 2.345, + "args": { + "External id": 516211,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146109325.551, "dur": 0.776, + "args": { + "External id": 516212,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146109327.953, "dur": 15.138, + "args": { + "External id": 516213,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146109330.516, "dur": 0.716, + "args": { + "External id": 516214,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146109424.767, "dur": 34.274, + "args": { + "External id": 516215,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146109476.535, "dur": 35.400, + "args": { + "External id": 516216,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146109527.755, "dur": 49.910, + "args": { + "External id": 516217,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146109585.284, "dur": 80.597, + "args": { + "External id": 516218,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146109693.322, "dur": 25.435, + "args": { + "External id": 516219,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146109729.584, "dur": 34.619, + "args": { + "External id": 516220,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146109772.560, "dur": 29.614, + "args": { + "External id": 516221,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146109809.602, "dur": 31.291, + "args": { + "External id": 516222,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274146109866.202, "dur": 27.818, + "args": { + "External id": 516223,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146109913.510, "dur": 27.305, + "args": { + "External id": 516224,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146109957.700, "dur": 20.202, + "args": { + "External id": 516225,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146109994.027, "dur": 16.272, + "args": { + "External id": 516226,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274146110027.374, "dur": 19.584, + "args": { + "External id": 516227,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146110170.525, "dur": 17.931, + "args": { + "External id": 516228,"Record function id": 0, "Ev Idx": 9347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146110174.227, "dur": 13.313, + "args": { + "External id": 516229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146110179.111, "dur": 7.409, + "args": { + "External id": 516230,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146110181.402, "dur": 4.995, + "args": { + "External id": 516231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146110192.331, "dur": 5.301, + "args": { + "External id": 516232,"Record function id": 0, "Ev Idx": 9351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146110193.558, "dur": 3.622, + "args": { + "External id": 516233,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146110194.556, "dur": 2.148, + "args": { + "External id": 516234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146110195.439, "dur": 1.164, + "args": { + "External id": 516235,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146110200.987, "dur": 4.820, + "args": { + "External id": 516236,"Record function id": 0, "Ev Idx": 9355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146110202.685, "dur": 2.711, + "args": { + "External id": 516237,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146110203.340, "dur": 1.626, + "args": { + "External id": 516238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146110203.970, "dur": 0.929, + "args": { + "External id": 516239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146110209.014, "dur": 6.117, + "args": { + "External id": 516240,"Record function id": 0, "Ev Idx": 9359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146110209.947, "dur": 4.762, + "args": { + "External id": 516241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146110210.452, "dur": 3.842, + "args": { + "External id": 516242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146110210.740, "dur": 3.480, + "args": { + "External id": 516243,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146110218.423, "dur": 4.311, + "args": { + "External id": 516244,"Record function id": 0, "Ev Idx": 9363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146110219.782, "dur": 2.488, + "args": { + "External id": 516245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146110220.276, "dur": 1.500, + "args": { + "External id": 516246,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146110220.800, "dur": 0.905, + "args": { + "External id": 516247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146110225.904, "dur": 4.659, + "args": { + "External id": 516248,"Record function id": 0, "Ev Idx": 9367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146110227.465, "dur": 2.668, + "args": { + "External id": 516249,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146110228.180, "dur": 1.465, + "args": { + "External id": 516250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146110228.731, "dur": 0.806, + "args": { + "External id": 516251,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146110233.673, "dur": 4.312, + "args": { + "External id": 516252,"Record function id": 0, "Ev Idx": 9371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146110235.127, "dur": 2.416, + "args": { + "External id": 516253,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146110235.797, "dur": 1.324, + "args": { + "External id": 516254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146110236.239, "dur": 0.804, + "args": { + "External id": 516255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146110240.984, "dur": 4.303, + "args": { + "External id": 516256,"Record function id": 0, "Ev Idx": 9375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146110242.242, "dur": 2.632, + "args": { + "External id": 516257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146110242.874, "dur": 1.540, + "args": { + "External id": 516258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146110243.376, "dur": 0.909, + "args": { + "External id": 516259,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146110248.548, "dur": 3.725, + "args": { + "External id": 516260,"Record function id": 0, "Ev Idx": 9379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146110249.528, "dur": 2.323, + "args": { + "External id": 516261,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146110250.097, "dur": 1.320, + "args": { + "External id": 516262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146110250.571, "dur": 0.717, + "args": { + "External id": 516263,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146110256.171, "dur": 66691.972, + "args": { + "External id": 516264,"Record function id": 0, "Sequence number": 5516268, "Fwd thread id": 1, "Ev Idx": 9383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146110257.606, "dur": 66680.329, + "args": { + "External id": 516265,"Sequence number": 5516268, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9384 + } + }, + { + "ph": "f", "id": 419, "pid": 2338711, "tid": 2379440, "ts": 6274146110257.606, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.14)", "pid": 2338711, "tid": 2379440, + "ts": 6274146110292.091, "dur": 45.424, + "args": { + "External id": 516266,"Record function id": 0, "Ev Idx": 9385 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.14)", "pid": 2338711, "tid": 2379440, + "ts": 6274146110345.656, "dur": 69.575, + "args": { + "External id": 516267,"Record function id": 0, "Ev Idx": 9386 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.14)", "pid": 2338711, "tid": 2379440, + "ts": 6274146110425.052, "dur": 66503.803, + "args": { + "External id": 516268,"Record function id": 0, "Ev Idx": 9387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146110528.270, "dur": 8.460, + "args": { + "External id": 516269,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146110546.543, "dur": 4.809, + "args": { + "External id": 516270,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146110567.074, "dur": 65267.363, + "args": { + "External id": 516271,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146110582.041, "dur": 65236.923, + "args": { + "External id": 516272,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146110714.531, "dur": 23.026, + "args": { + "External id": 516273,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146110758.990, "dur": 65007.891, + "args": { + "External id": 516274,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146110762.195, "dur": 65003.436, + "args": { + "External id": 516275,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146110767.380, "dur": 14.272, + "args": { + "External id": 516276,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146110783.704, "dur": 64974.932, + "args": { + "External id": 516277,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146175958.540, "dur": 13.030, + "args": { + "External id": 516278,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146175962.479, "dur": 8.697, + "args": { + "External id": 516279,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146176007.095, "dur": 471.627, + "args": { + "External id": 516280,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146176043.038, "dur": 429.688, + "args": { + "External id": 516281,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9400, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146176088.407, "dur": 378.770, + "args": { + "External id": 516282,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146176506.423, "dur": 2.721, + "args": { + "External id": 516283,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9402, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146176580.685, "dur": 24.477, + "args": { + "External id": 516284,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146176626.451, "dur": 50.052, + "args": { + "External id": 516285,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146176691.435, "dur": 2.837, + "args": { + "External id": 516286,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146176701.824, "dur": 13.524, + "args": { + "External id": 516287,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146176722.199, "dur": 1.335, + "args": { + "External id": 516288,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146176728.391, "dur": 12.267, + "args": { + "External id": 516289,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146176745.646, "dur": 1.177, + "args": { + "External id": 516290,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146176750.754, "dur": 10.136, + "args": { + "External id": 516291,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146176765.360, "dur": 0.979, + "args": { + "External id": 516292,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146176770.111, "dur": 10.405, + "args": { + "External id": 516293,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146176784.827, "dur": 1.376, + "args": { + "External id": 516294,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146176790.096, "dur": 10.360, + "args": { + "External id": 516295,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146176806.712, "dur": 3.275, + "args": { + "External id": 516296,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146176813.828, "dur": 10.153, + "args": { + "External id": 516297,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146176828.212, "dur": 0.738, + "args": { + "External id": 516298,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146176832.710, "dur": 10.425, + "args": { + "External id": 516299,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146176847.289, "dur": 0.750, + "args": { + "External id": 516300,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146176851.924, "dur": 9.700, + "args": { + "External id": 516301,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146176966.546, "dur": 3282.332, + "args": { + "External id": 516302,"Record function id": 0, "Ev Idx": 9421 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.13)", "pid": 2338711, "tid": 2379440, + "ts": 6274146176991.409, "dur": 1223.726, + "args": { + "External id": 516303,"Record function id": 0, "Ev Idx": 9422 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2338711, "tid": 2379440, + "ts": 6274146177008.999, "dur": 393.561, + "args": { + "External id": 516304,"Record function id": 0, "Ev Idx": 9423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146177138.399, "dur": 5.585, + "args": { + "External id": 516305,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146177148.059, "dur": 0.856, + "args": { + "External id": 516306,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146177150.916, "dur": 0.841, + "args": { + "External id": 516307,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146177153.427, "dur": 0.887, + "args": { + "External id": 516308,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146177156.264, "dur": 0.769, + "args": { + "External id": 516309,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146177159.017, "dur": 2.818, + "args": { + "External id": 516310,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146177163.361, "dur": 0.791, + "args": { + "External id": 516311,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146177167.511, "dur": 2.059, + "args": { + "External id": 516312,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146177171.205, "dur": 0.572, + "args": { + "External id": 516313,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146177172.942, "dur": 0.799, + "args": { + "External id": 516314,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146177194.383, "dur": 173.178, + "args": { + "External id": 516315,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146177212.855, "dur": 149.185, + "args": { + "External id": 516316,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146177234.484, "dur": 15.315, + "args": { + "External id": 516317,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146177253.958, "dur": 75.701, + "args": { + "External id": 516318,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146177257.119, "dur": 72.138, + "args": { + "External id": 516319,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146177261.765, "dur": 5.741, + "args": { + "External id": 516320,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146177272.891, "dur": 55.490, + "args": { + "External id": 516321,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9440 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.12", "pid": 2338711, "tid": 2379440, + "ts": 6274146177490.718, "dur": 714.434, + "args": { + "External id": 516322,"Record function id": 0, "Ev Idx": 9441 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2338711, "tid": 2379440, + "ts": 6274146177510.367, "dur": 680.879, + "args": { + "External id": 516323,"Record function id": 0, "Ev Idx": 9442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146177571.982, "dur": 5.584, + "args": { + "External id": 516324,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146177616.335, "dur": 43.926, + "args": { + "External id": 516325,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146177623.914, "dur": 4.540, + "args": { + "External id": 516326,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146177631.555, "dur": 0.726, + "args": { + "External id": 516327,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146177634.693, "dur": 0.657, + "args": { + "External id": 516328,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146177637.784, "dur": 0.487, + "args": { + "External id": 516329,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146177640.172, "dur": 0.806, + "args": { + "External id": 516330,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146177643.508, "dur": 0.762, + "args": { + "External id": 516331,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146177646.530, "dur": 0.589, + "args": { + "External id": 516332,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146177649.037, "dur": 0.476, + "args": { + "External id": 516333,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146177651.914, "dur": 2.787, + "args": { + "External id": 516334,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146177674.176, "dur": 48.369, + "args": { + "External id": 516335,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274146177757.782, "dur": 121.011, + "args": { + "External id": 516336,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146177770.219, "dur": 4.819, + "args": { + "External id": 516337,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274146177780.568, "dur": 11.676, + "args": { + "External id": 516338,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274146177785.729, "dur": 6.108, + "args": { + "External id": 516339,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146177789.662, "dur": 0.929, + "args": { + "External id": 516340,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146177799.475, "dur": 30.976, + "args": { + "External id": 516341,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146177801.414, "dur": 0.621, + "args": { + "External id": 516342,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146177803.039, "dur": 0.563, + "args": { + "External id": 516343,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146177805.453, "dur": 0.446, + "args": { + "External id": 516344,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146177808.000, "dur": 0.425, + "args": { + "External id": 516345,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146177809.557, "dur": 0.663, + "args": { + "External id": 516346,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146177818.484, "dur": 2.983, + "args": { + "External id": 516347,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146177823.273, "dur": 0.487, + "args": { + "External id": 516348,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146177825.506, "dur": 0.344, + "args": { + "External id": 516349,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146177827.089, "dur": 0.648, + "args": { + "External id": 516350,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146177840.883, "dur": 29.896, + "args": { + "External id": 516351,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146177930.313, "dur": 173.071, + "args": { + "External id": 516352,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146177960.408, "dur": 138.465, + "args": { + "External id": 516353,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9472, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146177970.833, "dur": 80.960, + "args": { + "External id": 516354,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146178126.454, "dur": 2.325, + "args": { + "External id": 516355,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9474, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146178224.192, "dur": 2000.323, + "args": { + "External id": 516356,"Sequence number": 5516267, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9475 + } + }, + { + "ph": "f", "id": 420, "pid": 2338711, "tid": 2379440, "ts": 6274146178224.192, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146178348.764, "dur": 124.407, + "args": { + "External id": 516357,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146178522.141, "dur": 41.535, + "args": { + "External id": 516358,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274146178581.847, "dur": 93.411, + "args": { + "External id": 516359,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146178690.971, "dur": 36.549, + "args": { + "External id": 516360,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146178734.062, "dur": 35.071, + "args": { + "External id": 516361,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146178778.458, "dur": 28.592, + "args": { + "External id": 516362,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146178813.867, "dur": 30.096, + "args": { + "External id": 516363,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146178873.414, "dur": 28.609, + "args": { + "External id": 516364,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146178923.224, "dur": 32.134, + "args": { + "External id": 516365,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146178977.962, "dur": 21.770, + "args": { + "External id": 516366,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146179016.617, "dur": 16.843, + "args": { + "External id": 516367,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146179042.780, "dur": 97.546, + "args": { + "External id": 516368,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146179150.140, "dur": 41.637, + "args": { + "External id": 516369,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274146179228.660, "dur": 283.231, + "args": { + "External id": 516370,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146179316.589, "dur": 9.385, + "args": { + "External id": 516371,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146179328.394, "dur": 2.947, + "args": { + "External id": 516372,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146179332.908, "dur": 2.513, + "args": { + "External id": 516373,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146179336.626, "dur": 2.376, + "args": { + "External id": 516374,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146179388.763, "dur": 6.015, + "args": { + "External id": 516375,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146179391.433, "dur": 3.125, + "args": { + "External id": 516376,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146179396.926, "dur": 35.194, + "args": { + "External id": 516377,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146179402.728, "dur": 1.781, + "args": { + "External id": 516378,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146179433.610, "dur": 2.165, + "args": { + "External id": 516379,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146179435.102, "dur": 0.601, + "args": { + "External id": 516380,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146179437.197, "dur": 15.889, + "args": { + "External id": 516381,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146179439.593, "dur": 0.533, + "args": { + "External id": 516382,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146179554.042, "dur": 57.119, + "args": { + "External id": 516383,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146179649.497, "dur": 26.830, + "args": { + "External id": 516384,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146179690.108, "dur": 61.354, + "args": { + "External id": 516385,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146179759.612, "dur": 42.438, + "args": { + "External id": 516386,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146179814.918, "dur": 21.160, + "args": { + "External id": 516387,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146179843.841, "dur": 32.830, + "args": { + "External id": 516388,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146179885.025, "dur": 30.281, + "args": { + "External id": 516389,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146179923.232, "dur": 31.108, + "args": { + "External id": 516390,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274146179974.435, "dur": 25.423, + "args": { + "External id": 516391,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146180016.354, "dur": 25.084, + "args": { + "External id": 516392,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146180089.942, "dur": 23.276, + "args": { + "External id": 516393,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146180132.807, "dur": 15.078, + "args": { + "External id": 516394,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274146180165.681, "dur": 18.420, + "args": { + "External id": 516395,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146180274.205, "dur": 18.046, + "args": { + "External id": 516396,"Record function id": 0, "Ev Idx": 9515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146180278.311, "dur": 12.866, + "args": { + "External id": 516397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146180282.889, "dur": 7.269, + "args": { + "External id": 516398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146180285.075, "dur": 4.892, + "args": { + "External id": 516399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146180296.475, "dur": 5.103, + "args": { + "External id": 516400,"Record function id": 0, "Ev Idx": 9519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146180297.696, "dur": 3.444, + "args": { + "External id": 516401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146180298.540, "dur": 2.136, + "args": { + "External id": 516402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146180299.590, "dur": 0.954, + "args": { + "External id": 516403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146180304.869, "dur": 4.930, + "args": { + "External id": 516404,"Record function id": 0, "Ev Idx": 9523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146180306.167, "dur": 3.218, + "args": { + "External id": 516405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146180307.111, "dur": 1.853, + "args": { + "External id": 516406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146180307.564, "dur": 1.332, + "args": { + "External id": 516407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146180313.010, "dur": 3.964, + "args": { + "External id": 516408,"Record function id": 0, "Ev Idx": 9527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146180314.169, "dur": 2.420, + "args": { + "External id": 516409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146180314.842, "dur": 1.337, + "args": { + "External id": 516410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146180315.409, "dur": 0.709, + "args": { + "External id": 516411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146180320.026, "dur": 6.330, + "args": { + "External id": 516412,"Record function id": 0, "Ev Idx": 9531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146180321.394, "dur": 4.533, + "args": { + "External id": 516413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146180321.971, "dur": 3.393, + "args": { + "External id": 516414,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146180322.414, "dur": 2.870, + "args": { + "External id": 516415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146180329.449, "dur": 4.361, + "args": { + "External id": 516416,"Record function id": 0, "Ev Idx": 9535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146180330.784, "dur": 2.583, + "args": { + "External id": 516417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146180331.255, "dur": 1.645, + "args": { + "External id": 516418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146180332.060, "dur": 0.734, + "args": { + "External id": 516419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146180336.996, "dur": 3.900, + "args": { + "External id": 516420,"Record function id": 0, "Ev Idx": 9539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146180338.210, "dur": 2.255, + "args": { + "External id": 516421,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146180338.871, "dur": 1.179, + "args": { + "External id": 516422,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146180339.229, "dur": 0.729, + "args": { + "External id": 516423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146180343.894, "dur": 4.322, + "args": { + "External id": 516424,"Record function id": 0, "Ev Idx": 9543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146180345.064, "dur": 2.734, + "args": { + "External id": 516425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146180345.887, "dur": 1.457, + "args": { + "External id": 516426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146180346.389, "dur": 0.843, + "args": { + "External id": 516427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146180351.640, "dur": 3.763, + "args": { + "External id": 516428,"Record function id": 0, "Ev Idx": 9547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146180352.729, "dur": 2.262, + "args": { + "External id": 516429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146180353.303, "dur": 1.249, + "args": { + "External id": 516430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146180353.798, "dur": 0.625, + "args": { + "External id": 516431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146180359.987, "dur": 66459.060, + "args": { + "External id": 516432,"Record function id": 0, "Sequence number": 5516266, "Fwd thread id": 1, "Ev Idx": 9551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146180361.618, "dur": 66446.294, + "args": { + "External id": 516433,"Sequence number": 5516266, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9552 + } + }, + { + "ph": "f", "id": 421, "pid": 2338711, "tid": 2379440, "ts": 6274146180361.618, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.13)", "pid": 2338711, "tid": 2379440, + "ts": 6274146180396.463, "dur": 43.528, + "args": { + "External id": 516434,"Record function id": 0, "Ev Idx": 9553 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.13)", "pid": 2338711, "tid": 2379440, + "ts": 6274146180447.792, "dur": 66.098, + "args": { + "External id": 516435,"Record function id": 0, "Ev Idx": 9554 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.13)", "pid": 2338711, "tid": 2379440, + "ts": 6274146180520.117, "dur": 66278.968, + "args": { + "External id": 516436,"Record function id": 0, "Ev Idx": 9555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146180641.949, "dur": 11.207, + "args": { + "External id": 516437,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146180667.372, "dur": 6.351, + "args": { + "External id": 516438,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146180693.956, "dur": 65064.408, + "args": { + "External id": 516439,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146180709.865, "dur": 65033.632, + "args": { + "External id": 516440,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146180809.840, "dur": 20.445, + "args": { + "External id": 516441,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146180851.084, "dur": 64838.587, + "args": { + "External id": 516442,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146180854.566, "dur": 64833.787, + "args": { + "External id": 516443,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146180860.309, "dur": 10.290, + "args": { + "External id": 516444,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146180874.749, "dur": 64806.930, + "args": { + "External id": 516445,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146245878.563, "dur": 13.697, + "args": { + "External id": 516446,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146245882.622, "dur": 9.268, + "args": { + "External id": 516447,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146245927.108, "dur": 415.145, + "args": { + "External id": 516448,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146245958.529, "dur": 378.034, + "args": { + "External id": 516449,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9568, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146245971.942, "dur": 357.240, + "args": { + "External id": 516450,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146246366.283, "dur": 2.604, + "args": { + "External id": 516451,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9570, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146246435.381, "dur": 9.575, + "args": { + "External id": 516452,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146246458.554, "dur": 37.156, + "args": { + "External id": 516453,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146246507.043, "dur": 1.596, + "args": { + "External id": 516454,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146246514.060, "dur": 11.856, + "args": { + "External id": 516455,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146246532.501, "dur": 0.947, + "args": { + "External id": 516456,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146246538.086, "dur": 11.097, + "args": { + "External id": 516457,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146246554.548, "dur": 0.846, + "args": { + "External id": 516458,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146246559.623, "dur": 10.521, + "args": { + "External id": 516459,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146246574.856, "dur": 0.771, + "args": { + "External id": 516460,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146246580.036, "dur": 32.315, + "args": { + "External id": 516461,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146246626.137, "dur": 1.759, + "args": { + "External id": 516462,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146246634.633, "dur": 16.759, + "args": { + "External id": 516463,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146246658.201, "dur": 0.953, + "args": { + "External id": 516464,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146246667.715, "dur": 16.842, + "args": { + "External id": 516465,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146246689.934, "dur": 0.936, + "args": { + "External id": 516466,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146246694.938, "dur": 10.755, + "args": { + "External id": 516467,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146246710.320, "dur": 3.309, + "args": { + "External id": 516468,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146246718.005, "dur": 11.224, + "args": { + "External id": 516469,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146246837.347, "dur": 3166.163, + "args": { + "External id": 516470,"Record function id": 0, "Ev Idx": 9589 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.12)", "pid": 2338711, "tid": 2379440, + "ts": 6274146246859.245, "dur": 1158.288, + "args": { + "External id": 516471,"Record function id": 0, "Ev Idx": 9590 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2338711, "tid": 2379440, + "ts": 6274146246877.222, "dur": 383.306, + "args": { + "External id": 516472,"Record function id": 0, "Ev Idx": 9591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146246962.514, "dur": 4.808, + "args": { + "External id": 516473,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146246970.714, "dur": 0.980, + "args": { + "External id": 516474,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146246974.012, "dur": 0.773, + "args": { + "External id": 516475,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146246976.736, "dur": 0.959, + "args": { + "External id": 516476,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146246981.695, "dur": 0.673, + "args": { + "External id": 516477,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146246984.238, "dur": 0.810, + "args": { + "External id": 516478,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146246986.385, "dur": 0.924, + "args": { + "External id": 516479,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146246988.638, "dur": 4.899, + "args": { + "External id": 516480,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146246997.072, "dur": 0.693, + "args": { + "External id": 516481,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146246999.025, "dur": 0.922, + "args": { + "External id": 516482,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146247018.627, "dur": 204.230, + "args": { + "External id": 516483,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146247036.642, "dur": 180.103, + "args": { + "External id": 516484,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146247093.168, "dur": 16.137, + "args": { + "External id": 516485,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146247113.850, "dur": 73.108, + "args": { + "External id": 516486,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146247116.890, "dur": 69.700, + "args": { + "External id": 516487,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146247121.654, "dur": 6.762, + "args": { + "External id": 516488,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146247130.418, "dur": 55.396, + "args": { + "External id": 516489,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9608 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.11", "pid": 2338711, "tid": 2379440, + "ts": 6274146247348.351, "dur": 661.376, + "args": { + "External id": 516490,"Record function id": 0, "Ev Idx": 9609 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2338711, "tid": 2379440, + "ts": 6274146247366.444, "dur": 629.545, + "args": { + "External id": 516491,"Record function id": 0, "Ev Idx": 9610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146247426.501, "dur": 5.330, + "args": { + "External id": 516492,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146247448.164, "dur": 30.256, + "args": { + "External id": 516493,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146247453.731, "dur": 1.774, + "args": { + "External id": 516494,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146247457.510, "dur": 0.619, + "args": { + "External id": 516495,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146247460.117, "dur": 2.509, + "args": { + "External id": 516496,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146247464.376, "dur": 0.305, + "args": { + "External id": 516497,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146247465.835, "dur": 0.666, + "args": { + "External id": 516498,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146247468.244, "dur": 0.449, + "args": { + "External id": 516499,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146247470.248, "dur": 0.372, + "args": { + "External id": 516500,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146247471.602, "dur": 0.345, + "args": { + "External id": 516501,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146247473.782, "dur": 0.515, + "args": { + "External id": 516502,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146247490.094, "dur": 42.840, + "args": { + "External id": 516503,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274146247564.743, "dur": 170.303, + "args": { + "External id": 516504,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146247575.279, "dur": 3.150, + "args": { + "External id": 516505,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274146247583.491, "dur": 36.322, + "args": { + "External id": 516506,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274146247607.599, "dur": 11.609, + "args": { + "External id": 516507,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146247613.402, "dur": 3.480, + "args": { + "External id": 516508,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146247630.913, "dur": 33.745, + "args": { + "External id": 516509,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146247633.996, "dur": 0.700, + "args": { + "External id": 516510,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146247637.142, "dur": 0.562, + "args": { + "External id": 516511,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146247640.506, "dur": 0.629, + "args": { + "External id": 516512,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146247643.506, "dur": 0.450, + "args": { + "External id": 516513,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146247646.059, "dur": 1.104, + "args": { + "External id": 516514,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146247649.520, "dur": 0.766, + "args": { + "External id": 516515,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146247652.403, "dur": 0.506, + "args": { + "External id": 516516,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146247654.781, "dur": 3.287, + "args": { + "External id": 516517,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146247660.439, "dur": 0.570, + "args": { + "External id": 516518,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146247684.287, "dur": 42.332, + "args": { + "External id": 516519,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146247788.716, "dur": 123.227, + "args": { + "External id": 516520,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146247816.315, "dur": 91.984, + "args": { + "External id": 516521,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9640, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146247827.706, "dur": 76.285, + "args": { + "External id": 516522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146247933.466, "dur": 1.966, + "args": { + "External id": 516523,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9642, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146248025.654, "dur": 1956.090, + "args": { + "External id": 516524,"Sequence number": 5516265, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9643 + } + }, + { + "ph": "f", "id": 422, "pid": 2338711, "tid": 2379440, "ts": 6274146248025.654, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146248188.390, "dur": 115.866, + "args": { + "External id": 516525,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146248350.440, "dur": 42.262, + "args": { + "External id": 516526,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274146248413.221, "dur": 50.439, + "args": { + "External id": 516527,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146248474.575, "dur": 33.355, + "args": { + "External id": 516528,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146248514.391, "dur": 33.472, + "args": { + "External id": 516529,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146248554.388, "dur": 28.522, + "args": { + "External id": 516530,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146248615.189, "dur": 46.594, + "args": { + "External id": 516531,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146248697.521, "dur": 29.232, + "args": { + "External id": 516532,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146248747.445, "dur": 32.485, + "args": { + "External id": 516533,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146248802.798, "dur": 21.800, + "args": { + "External id": 516534,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146248839.351, "dur": 16.702, + "args": { + "External id": 516535,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146248868.223, "dur": 40.685, + "args": { + "External id": 516536,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146248912.454, "dur": 34.612, + "args": { + "External id": 516537,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274146248980.519, "dur": 304.429, + "args": { + "External id": 516538,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146249103.840, "dur": 8.235, + "args": { + "External id": 516539,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146249114.892, "dur": 3.134, + "args": { + "External id": 516540,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146249119.479, "dur": 5.046, + "args": { + "External id": 516541,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146249125.756, "dur": 2.384, + "args": { + "External id": 516542,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146249177.423, "dur": 5.609, + "args": { + "External id": 516543,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146249179.691, "dur": 3.134, + "args": { + "External id": 516544,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146249185.123, "dur": 33.600, + "args": { + "External id": 516545,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146249191.135, "dur": 1.968, + "args": { + "External id": 516546,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146249220.149, "dur": 1.964, + "args": { + "External id": 516547,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146249221.301, "dur": 0.678, + "args": { + "External id": 516548,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146249223.463, "dur": 14.422, + "args": { + "External id": 516549,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146249226.154, "dur": 0.500, + "args": { + "External id": 516550,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146249331.141, "dur": 29.827, + "args": { + "External id": 516551,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146249380.966, "dur": 18.602, + "args": { + "External id": 516552,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146249408.263, "dur": 64.523, + "args": { + "External id": 516553,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146249486.115, "dur": 45.544, + "args": { + "External id": 516554,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146249546.053, "dur": 21.119, + "args": { + "External id": 516555,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146249576.271, "dur": 59.867, + "args": { + "External id": 516556,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146249651.524, "dur": 40.554, + "args": { + "External id": 516557,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146249700.805, "dur": 36.041, + "args": { + "External id": 516558,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274146249760.806, "dur": 29.169, + "args": { + "External id": 516559,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146249806.980, "dur": 27.620, + "args": { + "External id": 516560,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146249849.185, "dur": 19.999, + "args": { + "External id": 516561,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146249894.397, "dur": 16.481, + "args": { + "External id": 516562,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274146249925.304, "dur": 19.809, + "args": { + "External id": 516563,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146250028.160, "dur": 16.931, + "args": { + "External id": 516564,"Record function id": 0, "Ev Idx": 9683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146250031.859, "dur": 12.197, + "args": { + "External id": 516565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146250036.340, "dur": 6.828, + "args": { + "External id": 516566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146250038.453, "dur": 4.595, + "args": { + "External id": 516567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146250048.736, "dur": 5.019, + "args": { + "External id": 516568,"Record function id": 0, "Ev Idx": 9687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146250050.204, "dur": 3.145, + "args": { + "External id": 516569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146250050.888, "dur": 1.971, + "args": { + "External id": 516570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146250051.824, "dur": 0.970, + "args": { + "External id": 516571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146250096.395, "dur": 7.992, + "args": { + "External id": 516572,"Record function id": 0, "Ev Idx": 9691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146250098.879, "dur": 4.733, + "args": { + "External id": 516573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146250100.053, "dur": 2.675, + "args": { + "External id": 516574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146250100.787, "dur": 1.733, + "args": { + "External id": 516575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146250108.168, "dur": 3.905, + "args": { + "External id": 516576,"Record function id": 0, "Ev Idx": 9695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146250109.235, "dur": 2.418, + "args": { + "External id": 516577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146250109.863, "dur": 1.324, + "args": { + "External id": 516578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146250110.404, "dur": 0.722, + "args": { + "External id": 516579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146250115.179, "dur": 3.745, + "args": { + "External id": 516580,"Record function id": 0, "Ev Idx": 9699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146250116.272, "dur": 2.259, + "args": { + "External id": 516581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146250116.888, "dur": 1.183, + "args": { + "External id": 516582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146250117.322, "dur": 0.665, + "args": { + "External id": 516583,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146250121.988, "dur": 6.622, + "args": { + "External id": 516584,"Record function id": 0, "Ev Idx": 9703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146250123.300, "dur": 4.876, + "args": { + "External id": 516585,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146250123.924, "dur": 3.794, + "args": { + "External id": 516586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146250124.353, "dur": 3.255, + "args": { + "External id": 516587,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146250131.769, "dur": 3.700, + "args": { + "External id": 516588,"Record function id": 0, "Ev Idx": 9707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146250132.764, "dur": 2.311, + "args": { + "External id": 516589,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146250133.244, "dur": 1.384, + "args": { + "External id": 516590,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146250133.805, "dur": 0.737, + "args": { + "External id": 516591,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146250139.017, "dur": 3.768, + "args": { + "External id": 516592,"Record function id": 0, "Ev Idx": 9711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146250140.072, "dur": 2.274, + "args": { + "External id": 516593,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146250140.701, "dur": 1.191, + "args": { + "External id": 516594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146250141.035, "dur": 0.767, + "args": { + "External id": 516595,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146250146.129, "dur": 4.160, + "args": { + "External id": 516596,"Record function id": 0, "Ev Idx": 9715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146250147.057, "dur": 2.812, + "args": { + "External id": 516597,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146250147.635, "dur": 1.801, + "args": { + "External id": 516598,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146250148.324, "dur": 1.003, + "args": { + "External id": 516599,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146250154.455, "dur": 64144.687, + "args": { + "External id": 516600,"Record function id": 0, "Sequence number": 5516264, "Fwd thread id": 1, "Ev Idx": 9719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146250155.830, "dur": 64133.074, + "args": { + "External id": 516601,"Sequence number": 5516264, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9720 + } + }, + { + "ph": "f", "id": 423, "pid": 2338711, "tid": 2379440, "ts": 6274146250155.830, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.12)", "pid": 2338711, "tid": 2379440, + "ts": 6274146250192.209, "dur": 42.602, + "args": { + "External id": 516602,"Record function id": 0, "Ev Idx": 9721 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.12)", "pid": 2338711, "tid": 2379440, + "ts": 6274146250243.447, "dur": 66.761, + "args": { + "External id": 516603,"Record function id": 0, "Ev Idx": 9722 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.12)", "pid": 2338711, "tid": 2379440, + "ts": 6274146250316.713, "dur": 63963.573, + "args": { + "External id": 516604,"Record function id": 0, "Ev Idx": 9723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146250414.604, "dur": 8.363, + "args": { + "External id": 516605,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146250433.315, "dur": 5.305, + "args": { + "External id": 516606,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146250455.241, "dur": 62718.479, + "args": { + "External id": 516607,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146250470.875, "dur": 62688.105, + "args": { + "External id": 516608,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146250609.627, "dur": 28.854, + "args": { + "External id": 516609,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146250668.909, "dur": 62435.684, + "args": { + "External id": 516610,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146250672.759, "dur": 62430.418, + "args": { + "External id": 516611,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146250679.930, "dur": 14.442, + "args": { + "External id": 516612,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146250696.781, "dur": 62399.334, + "args": { + "External id": 516613,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146313297.065, "dur": 13.352, + "args": { + "External id": 516614,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146313300.925, "dur": 9.039, + "args": { + "External id": 516615,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146313345.666, "dur": 474.905, + "args": { + "External id": 516616,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146313384.695, "dur": 429.425, + "args": { + "External id": 516617,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9736, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146313396.972, "dur": 410.327, + "args": { + "External id": 516618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146313849.439, "dur": 2.983, + "args": { + "External id": 516619,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9738, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146313927.187, "dur": 8.030, + "args": { + "External id": 516620,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146313949.361, "dur": 40.697, + "args": { + "External id": 516621,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146314001.214, "dur": 3.846, + "args": { + "External id": 516622,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146314011.271, "dur": 12.163, + "args": { + "External id": 516623,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146314029.434, "dur": 1.112, + "args": { + "External id": 516624,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146314034.769, "dur": 11.844, + "args": { + "External id": 516625,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146314051.869, "dur": 34.471, + "args": { + "External id": 516626,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146314093.459, "dur": 14.573, + "args": { + "External id": 516627,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146314114.385, "dur": 0.996, + "args": { + "External id": 516628,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146314119.422, "dur": 11.424, + "args": { + "External id": 516629,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146314135.362, "dur": 1.033, + "args": { + "External id": 516630,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146314140.771, "dur": 10.471, + "args": { + "External id": 516631,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146314155.821, "dur": 0.748, + "args": { + "External id": 516632,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146314160.148, "dur": 11.142, + "args": { + "External id": 516633,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146314178.353, "dur": 0.926, + "args": { + "External id": 516634,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146314183.445, "dur": 10.603, + "args": { + "External id": 516635,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146314198.580, "dur": 0.916, + "args": { + "External id": 516636,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146314202.845, "dur": 11.049, + "args": { + "External id": 516637,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146314317.529, "dur": 3221.628, + "args": { + "External id": 516638,"Record function id": 0, "Ev Idx": 9757 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.11)", "pid": 2338711, "tid": 2379440, + "ts": 6274146314339.858, "dur": 1181.863, + "args": { + "External id": 516639,"Record function id": 0, "Ev Idx": 9758 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2338711, "tid": 2379440, + "ts": 6274146314358.340, "dur": 390.773, + "args": { + "External id": 516640,"Record function id": 0, "Ev Idx": 9759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146314442.437, "dur": 7.216, + "args": { + "External id": 516641,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146314453.365, "dur": 0.986, + "args": { + "External id": 516642,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146314456.536, "dur": 1.098, + "args": { + "External id": 516643,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146314459.314, "dur": 0.761, + "args": { + "External id": 516644,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146314462.634, "dur": 0.870, + "args": { + "External id": 516645,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146314465.003, "dur": 0.912, + "args": { + "External id": 516646,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146314469.964, "dur": 0.843, + "args": { + "External id": 516647,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146314472.783, "dur": 2.314, + "args": { + "External id": 516648,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146314476.595, "dur": 2.495, + "args": { + "External id": 516649,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146314480.492, "dur": 0.683, + "args": { + "External id": 516650,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146314508.675, "dur": 199.388, + "args": { + "External id": 516651,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146314526.827, "dur": 174.698, + "args": { + "External id": 516652,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146314545.246, "dur": 15.221, + "args": { + "External id": 516653,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146314564.623, "dur": 100.737, + "args": { + "External id": 516654,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146314567.613, "dur": 97.306, + "args": { + "External id": 516655,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146314572.100, "dur": 5.885, + "args": { + "External id": 516656,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146314580.132, "dur": 82.930, + "args": { + "External id": 516657,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9776 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.10", "pid": 2338711, "tid": 2379440, + "ts": 6274146314843.014, "dur": 670.425, + "args": { + "External id": 516658,"Record function id": 0, "Ev Idx": 9777 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2338711, "tid": 2379440, + "ts": 6274146314863.347, "dur": 636.592, + "args": { + "External id": 516659,"Record function id": 0, "Ev Idx": 9778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146314930.669, "dur": 6.762, + "args": { + "External id": 516660,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146314953.636, "dur": 32.009, + "args": { + "External id": 516661,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146314959.449, "dur": 1.896, + "args": { + "External id": 516662,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146314963.708, "dur": 0.952, + "args": { + "External id": 516663,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146314966.232, "dur": 0.661, + "args": { + "External id": 516664,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146314968.783, "dur": 2.476, + "args": { + "External id": 516665,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146314973.088, "dur": 0.436, + "args": { + "External id": 516666,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146314974.741, "dur": 0.582, + "args": { + "External id": 516667,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146314977.431, "dur": 0.488, + "args": { + "External id": 516668,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146314979.241, "dur": 0.240, + "args": { + "External id": 516669,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146314980.793, "dur": 0.454, + "args": { + "External id": 516670,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146314998.082, "dur": 46.235, + "args": { + "External id": 516671,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274146315120.276, "dur": 124.533, + "args": { + "External id": 516672,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146315132.596, "dur": 5.093, + "args": { + "External id": 516673,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274146315143.349, "dur": 12.014, + "args": { + "External id": 516674,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274146315148.364, "dur": 6.534, + "args": { + "External id": 516675,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146315152.294, "dur": 0.858, + "args": { + "External id": 516676,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146315163.013, "dur": 25.826, + "args": { + "External id": 516677,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146315165.094, "dur": 2.829, + "args": { + "External id": 516678,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146315169.649, "dur": 0.776, + "args": { + "External id": 516679,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146315172.104, "dur": 0.378, + "args": { + "External id": 516680,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146315173.800, "dur": 0.590, + "args": { + "External id": 516681,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146315175.925, "dur": 0.387, + "args": { + "External id": 516682,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146315178.102, "dur": 0.352, + "args": { + "External id": 516683,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146315180.222, "dur": 0.521, + "args": { + "External id": 516684,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146315181.610, "dur": 0.382, + "args": { + "External id": 516685,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146315183.988, "dur": 2.280, + "args": { + "External id": 516686,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146315200.998, "dur": 35.964, + "args": { + "External id": 516687,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146315296.949, "dur": 128.724, + "args": { + "External id": 516688,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146315328.106, "dur": 93.950, + "args": { + "External id": 516689,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9808, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146315338.791, "dur": 78.987, + "args": { + "External id": 516690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146315443.102, "dur": 2.175, + "args": { + "External id": 516691,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9810, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146315529.457, "dur": 1988.035, + "args": { + "External id": 516692,"Sequence number": 5516263, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9811 + } + }, + { + "ph": "f", "id": 424, "pid": 2338711, "tid": 2379440, "ts": 6274146315529.457, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146315680.907, "dur": 129.413, + "args": { + "External id": 516693,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146315859.998, "dur": 44.467, + "args": { + "External id": 516694,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274146315925.043, "dur": 52.420, + "args": { + "External id": 516695,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146315986.721, "dur": 32.765, + "args": { + "External id": 516696,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146316026.353, "dur": 82.156, + "args": { + "External id": 516697,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146316122.001, "dur": 35.843, + "args": { + "External id": 516698,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146316168.488, "dur": 32.870, + "args": { + "External id": 516699,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146316228.455, "dur": 28.676, + "args": { + "External id": 516700,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146316275.614, "dur": 34.216, + "args": { + "External id": 516701,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146316332.612, "dur": 21.526, + "args": { + "External id": 516702,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146316369.653, "dur": 17.127, + "args": { + "External id": 516703,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146316398.614, "dur": 37.902, + "args": { + "External id": 516704,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146316440.446, "dur": 34.903, + "args": { + "External id": 516705,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274146316511.247, "dur": 343.131, + "args": { + "External id": 516706,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146316622.636, "dur": 9.279, + "args": { + "External id": 516707,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146316635.088, "dur": 3.756, + "args": { + "External id": 516708,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146316641.007, "dur": 3.566, + "args": { + "External id": 516709,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146316646.498, "dur": 6.083, + "args": { + "External id": 516710,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146316714.711, "dur": 6.204, + "args": { + "External id": 516711,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146316717.226, "dur": 3.437, + "args": { + "External id": 516712,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146316722.787, "dur": 36.020, + "args": { + "External id": 516713,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146316729.066, "dur": 2.150, + "args": { + "External id": 516714,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146316760.491, "dur": 10.527, + "args": { + "External id": 516715,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146316770.168, "dur": 0.781, + "args": { + "External id": 516716,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146316772.254, "dur": 16.855, + "args": { + "External id": 516717,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146316774.454, "dur": 0.582, + "args": { + "External id": 516718,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146316903.086, "dur": 36.105, + "args": { + "External id": 516719,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146316960.179, "dur": 18.215, + "args": { + "External id": 516720,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146316987.881, "dur": 59.657, + "args": { + "External id": 516721,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146317090.712, "dur": 50.029, + "args": { + "External id": 516722,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146317154.314, "dur": 22.685, + "args": { + "External id": 516723,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146317185.837, "dur": 33.820, + "args": { + "External id": 516724,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146317228.225, "dur": 30.129, + "args": { + "External id": 516725,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146317265.620, "dur": 32.717, + "args": { + "External id": 516726,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274146317318.797, "dur": 26.477, + "args": { + "External id": 516727,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146317362.386, "dur": 25.609, + "args": { + "External id": 516728,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146317402.475, "dur": 16.871, + "args": { + "External id": 516729,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146317433.517, "dur": 15.493, + "args": { + "External id": 516730,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274146317461.688, "dur": 20.075, + "args": { + "External id": 516731,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146317564.536, "dur": 16.192, + "args": { + "External id": 516732,"Record function id": 0, "Ev Idx": 9851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146317567.716, "dur": 12.053, + "args": { + "External id": 516733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146317571.985, "dur": 6.698, + "args": { + "External id": 516734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146317574.046, "dur": 4.507, + "args": { + "External id": 516735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146317584.609, "dur": 28.594, + "args": { + "External id": 516736,"Record function id": 0, "Ev Idx": 9855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146317586.270, "dur": 25.380, + "args": { + "External id": 516737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146317604.222, "dur": 5.946, + "args": { + "External id": 516738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146317606.930, "dur": 2.819, + "args": { + "External id": 516739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146317620.379, "dur": 7.598, + "args": { + "External id": 516740,"Record function id": 0, "Ev Idx": 9859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146317622.700, "dur": 4.594, + "args": { + "External id": 516741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146317624.088, "dur": 2.545, + "args": { + "External id": 516742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146317624.876, "dur": 1.578, + "args": { + "External id": 516743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146317633.049, "dur": 6.675, + "args": { + "External id": 516744,"Record function id": 0, "Ev Idx": 9863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146317634.836, "dur": 4.200, + "args": { + "External id": 516745,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146317636.238, "dur": 2.030, + "args": { + "External id": 516746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146317637.019, "dur": 1.084, + "args": { + "External id": 516747,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146317644.669, "dur": 6.166, + "args": { + "External id": 516748,"Record function id": 0, "Ev Idx": 9867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146317646.399, "dur": 3.731, + "args": { + "External id": 516749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146317647.432, "dur": 1.916, + "args": { + "External id": 516750,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146317648.200, "dur": 0.992, + "args": { + "External id": 516751,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146317655.379, "dur": 9.380, + "args": { + "External id": 516752,"Record function id": 0, "Ev Idx": 9871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146317657.413, "dur": 6.621, + "args": { + "External id": 516753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146317658.412, "dur": 4.931, + "args": { + "External id": 516754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146317659.392, "dur": 3.771, + "args": { + "External id": 516755,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146317669.919, "dur": 4.610, + "args": { + "External id": 516756,"Record function id": 0, "Ev Idx": 9875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146317671.491, "dur": 2.607, + "args": { + "External id": 516757,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146317672.310, "dur": 1.292, + "args": { + "External id": 516758,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146317672.741, "dur": 0.784, + "args": { + "External id": 516759,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146317677.788, "dur": 4.355, + "args": { + "External id": 516760,"Record function id": 0, "Ev Idx": 9879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146317679.219, "dur": 2.508, + "args": { + "External id": 516761,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146317679.972, "dur": 1.349, + "args": { + "External id": 516762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146317680.222, "dur": 1.021, + "args": { + "External id": 516763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146317685.660, "dur": 3.846, + "args": { + "External id": 516764,"Record function id": 0, "Ev Idx": 9883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146317686.545, "dur": 2.543, + "args": { + "External id": 516765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146317686.943, "dur": 1.567, + "args": { + "External id": 516766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146317687.728, "dur": 0.667, + "args": { + "External id": 516767,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146317694.200, "dur": 66325.454, + "args": { + "External id": 516768,"Record function id": 0, "Sequence number": 5516262, "Fwd thread id": 1, "Ev Idx": 9887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146317695.659, "dur": 66312.416, + "args": { + "External id": 516769,"Sequence number": 5516262, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9888 + } + }, + { + "ph": "f", "id": 425, "pid": 2338711, "tid": 2379440, "ts": 6274146317695.659, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.11)", "pid": 2338711, "tid": 2379440, + "ts": 6274146317734.021, "dur": 44.307, + "args": { + "External id": 516770,"Record function id": 0, "Ev Idx": 9889 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.11)", "pid": 2338711, "tid": 2379440, + "ts": 6274146317785.954, "dur": 69.888, + "args": { + "External id": 516771,"Record function id": 0, "Ev Idx": 9890 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.11)", "pid": 2338711, "tid": 2379440, + "ts": 6274146317861.569, "dur": 66137.652, + "args": { + "External id": 516772,"Record function id": 0, "Ev Idx": 9891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146317961.921, "dur": 7.898, + "args": { + "External id": 516773,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146317980.593, "dur": 5.073, + "args": { + "External id": 516774,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146318001.880, "dur": 64948.149, + "args": { + "External id": 516775,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146318016.840, "dur": 64918.628, + "args": { + "External id": 516776,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146318155.952, "dur": 22.353, + "args": { + "External id": 516777,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146318199.961, "dur": 64682.095, + "args": { + "External id": 516778,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146318203.628, "dur": 64677.158, + "args": { + "External id": 516779,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146318209.231, "dur": 11.917, + "args": { + "External id": 516780,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146318223.547, "dur": 64650.259, + "args": { + "External id": 516781,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146383102.494, "dur": 14.958, + "args": { + "External id": 516782,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146383106.763, "dur": 10.078, + "args": { + "External id": 516783,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146383153.328, "dur": 394.477, + "args": { + "External id": 516784,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146383190.688, "dur": 352.007, + "args": { + "External id": 516785,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9904, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146383203.402, "dur": 333.023, + "args": { + "External id": 516786,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146383572.494, "dur": 2.323, + "args": { + "External id": 516787,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9906, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146383671.595, "dur": 9.340, + "args": { + "External id": 516788,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146383696.993, "dur": 44.351, + "args": { + "External id": 516789,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146383753.732, "dur": 4.711, + "args": { + "External id": 516790,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146383763.895, "dur": 12.523, + "args": { + "External id": 516791,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146383782.711, "dur": 1.073, + "args": { + "External id": 516792,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146383788.513, "dur": 11.064, + "args": { + "External id": 516793,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146383805.041, "dur": 0.892, + "args": { + "External id": 516794,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146383810.407, "dur": 11.109, + "args": { + "External id": 516795,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146383825.845, "dur": 0.871, + "args": { + "External id": 516796,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146383830.853, "dur": 10.680, + "args": { + "External id": 516797,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146383845.796, "dur": 1.086, + "args": { + "External id": 516798,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146383850.938, "dur": 11.009, + "args": { + "External id": 516799,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146383866.455, "dur": 0.963, + "args": { + "External id": 516800,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146383871.289, "dur": 10.933, + "args": { + "External id": 516801,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146383889.321, "dur": 0.904, + "args": { + "External id": 516802,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146383894.931, "dur": 10.942, + "args": { + "External id": 516803,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146383909.703, "dur": 0.750, + "args": { + "External id": 516804,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146383917.967, "dur": 11.242, + "args": { + "External id": 516805,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146384039.207, "dur": 3243.948, + "args": { + "External id": 516806,"Record function id": 0, "Ev Idx": 9925 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.10)", "pid": 2338711, "tid": 2379440, + "ts": 6274146384098.731, "dur": 1190.170, + "args": { + "External id": 516807,"Record function id": 0, "Ev Idx": 9926 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2338711, "tid": 2379440, + "ts": 6274146384117.315, "dur": 355.990, + "args": { + "External id": 516808,"Record function id": 0, "Ev Idx": 9927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146384208.331, "dur": 7.349, + "args": { + "External id": 516809,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146384219.481, "dur": 0.868, + "args": { + "External id": 516810,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146384222.590, "dur": 0.955, + "args": { + "External id": 516811,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146384225.073, "dur": 0.900, + "args": { + "External id": 516812,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146384229.534, "dur": 0.696, + "args": { + "External id": 516813,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146384231.815, "dur": 1.101, + "args": { + "External id": 516814,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146384234.383, "dur": 1.014, + "args": { + "External id": 516815,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146384236.811, "dur": 2.258, + "args": { + "External id": 516816,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146384242.896, "dur": 2.787, + "args": { + "External id": 516817,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146384247.122, "dur": 0.775, + "args": { + "External id": 516818,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146384267.091, "dur": 171.570, + "args": { + "External id": 516819,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146384284.724, "dur": 148.594, + "args": { + "External id": 516820,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146384305.387, "dur": 16.090, + "args": { + "External id": 516821,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146384325.902, "dur": 72.067, + "args": { + "External id": 516822,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146384328.832, "dur": 68.639, + "args": { + "External id": 516823,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146384333.322, "dur": 5.874, + "args": { + "External id": 516824,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146384341.274, "dur": 55.550, + "args": { + "External id": 516825,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9944 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.9", "pid": 2338711, "tid": 2379440, + "ts": 6274146384564.173, "dur": 716.078, + "args": { + "External id": 516826,"Record function id": 0, "Ev Idx": 9945 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2338711, "tid": 2379440, + "ts": 6274146384582.719, "dur": 684.504, + "args": { + "External id": 516827,"Record function id": 0, "Ev Idx": 9946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146384679.140, "dur": 9.095, + "args": { + "External id": 516828,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146384707.102, "dur": 32.093, + "args": { + "External id": 516829,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146384713.432, "dur": 1.922, + "args": { + "External id": 516830,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146384717.437, "dur": 0.788, + "args": { + "External id": 516831,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146384720.485, "dur": 0.397, + "args": { + "External id": 516832,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146384722.398, "dur": 2.610, + "args": { + "External id": 516833,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146384726.109, "dur": 0.634, + "args": { + "External id": 516834,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146384728.535, "dur": 0.655, + "args": { + "External id": 516835,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146384730.516, "dur": 0.444, + "args": { + "External id": 516836,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146384731.920, "dur": 0.569, + "args": { + "External id": 516837,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146384734.336, "dur": 0.516, + "args": { + "External id": 516838,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146384751.458, "dur": 60.823, + "args": { + "External id": 516839,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274146384845.968, "dur": 121.953, + "args": { + "External id": 516840,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146384861.309, "dur": 3.464, + "args": { + "External id": 516841,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274146384870.454, "dur": 10.683, + "args": { + "External id": 516842,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274146384875.388, "dur": 5.337, + "args": { + "External id": 516843,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146384878.994, "dur": 0.571, + "args": { + "External id": 516844,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146384892.041, "dur": 25.155, + "args": { + "External id": 516845,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146384894.215, "dur": 2.630, + "args": { + "External id": 516846,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146384898.474, "dur": 0.668, + "args": { + "External id": 516847,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146384901.098, "dur": 0.357, + "args": { + "External id": 516848,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146384902.884, "dur": 0.618, + "args": { + "External id": 516849,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146384904.549, "dur": 0.553, + "args": { + "External id": 516850,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146384906.509, "dur": 0.514, + "args": { + "External id": 516851,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146384908.421, "dur": 0.335, + "args": { + "External id": 516852,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146384909.842, "dur": 0.627, + "args": { + "External id": 516853,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146384911.958, "dur": 2.472, + "args": { + "External id": 516854,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146384927.391, "dur": 32.807, + "args": { + "External id": 516855,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146385018.331, "dur": 164.739, + "args": { + "External id": 516856,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146385047.263, "dur": 131.389, + "args": { + "External id": 516857,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9976, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146385094.312, "dur": 80.075, + "args": { + "External id": 516858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146385204.012, "dur": 1.891, + "args": { + "External id": 516859,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9978, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146385296.672, "dur": 1964.573, + "args": { + "External id": 516860,"Sequence number": 5516261, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9979 + } + }, + { + "ph": "f", "id": 426, "pid": 2338711, "tid": 2379440, "ts": 6274146385296.672, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146385415.159, "dur": 115.587, + "args": { + "External id": 516861,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146385574.531, "dur": 70.758, + "args": { + "External id": 516862,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274146385674.890, "dur": 66.416, + "args": { + "External id": 516863,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146385752.757, "dur": 32.662, + "args": { + "External id": 516864,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146385792.524, "dur": 34.110, + "args": { + "External id": 516865,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146385833.309, "dur": 32.010, + "args": { + "External id": 516866,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146385873.983, "dur": 33.087, + "args": { + "External id": 516867,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146385935.379, "dur": 28.087, + "args": { + "External id": 516868,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146385984.709, "dur": 33.466, + "args": { + "External id": 516869,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146386039.586, "dur": 64.798, + "args": { + "External id": 516870,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146386124.734, "dur": 20.027, + "args": { + "External id": 516871,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146386161.014, "dur": 45.172, + "args": { + "External id": 516872,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146386210.513, "dur": 35.657, + "args": { + "External id": 516873,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274146386278.971, "dur": 272.208, + "args": { + "External id": 516874,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146386362.586, "dur": 7.434, + "args": { + "External id": 516875,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146386372.265, "dur": 3.130, + "args": { + "External id": 516876,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146386377.002, "dur": 2.129, + "args": { + "External id": 516877,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146386380.580, "dur": 4.554, + "args": { + "External id": 516878,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146386434.761, "dur": 6.050, + "args": { + "External id": 516879,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146386437.345, "dur": 3.237, + "args": { + "External id": 516880,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146386448.667, "dur": 35.888, + "args": { + "External id": 516881,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146386454.685, "dur": 2.135, + "args": { + "External id": 516882,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146386485.954, "dur": 2.366, + "args": { + "External id": 516883,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146386487.629, "dur": 0.613, + "args": { + "External id": 516884,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146386489.121, "dur": 16.988, + "args": { + "External id": 516885,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146386491.314, "dur": 0.593, + "args": { + "External id": 516886,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146386626.342, "dur": 47.689, + "args": { + "External id": 516887,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146386702.916, "dur": 20.454, + "args": { + "External id": 516888,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146386736.170, "dur": 61.928, + "args": { + "External id": 516889,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146386805.882, "dur": 43.422, + "args": { + "External id": 516890,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146386858.594, "dur": 21.687, + "args": { + "External id": 516891,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146386889.211, "dur": 32.419, + "args": { + "External id": 516892,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146386930.011, "dur": 30.761, + "args": { + "External id": 516893,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146386968.382, "dur": 31.532, + "args": { + "External id": 516894,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274146387018.284, "dur": 24.494, + "args": { + "External id": 516895,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146387095.852, "dur": 29.297, + "args": { + "External id": 516896,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146387141.989, "dur": 18.030, + "args": { + "External id": 516897,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146387176.571, "dur": 14.454, + "args": { + "External id": 516898,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274146387205.107, "dur": 17.858, + "args": { + "External id": 516899,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146387307.731, "dur": 17.776, + "args": { + "External id": 516900,"Record function id": 0, "Ev Idx": 10019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146387311.668, "dur": 12.842, + "args": { + "External id": 516901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146387316.490, "dur": 6.880, + "args": { + "External id": 516902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146387318.619, "dur": 4.629, + "args": { + "External id": 516903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146387329.448, "dur": 5.642, + "args": { + "External id": 516904,"Record function id": 0, "Ev Idx": 10023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146387330.935, "dur": 3.733, + "args": { + "External id": 516905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146387331.858, "dur": 2.245, + "args": { + "External id": 516906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146387332.952, "dur": 1.006, + "args": { + "External id": 516907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146387338.539, "dur": 5.076, + "args": { + "External id": 516908,"Record function id": 0, "Ev Idx": 10027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146387340.415, "dur": 2.772, + "args": { + "External id": 516909,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146387340.933, "dur": 1.668, + "args": { + "External id": 516910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146387341.283, "dur": 1.249, + "args": { + "External id": 516911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146387346.768, "dur": 3.387, + "args": { + "External id": 516912,"Record function id": 0, "Ev Idx": 10031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146387347.655, "dur": 2.053, + "args": { + "External id": 516913,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146387348.301, "dur": 0.998, + "args": { + "External id": 516914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146387348.583, "dur": 0.642, + "args": { + "External id": 516915,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146387353.343, "dur": 3.725, + "args": { + "External id": 516916,"Record function id": 0, "Ev Idx": 10035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146387354.460, "dur": 2.203, + "args": { + "External id": 516917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146387354.983, "dur": 1.199, + "args": { + "External id": 516918,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146387355.365, "dur": 0.741, + "args": { + "External id": 516919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146387360.115, "dur": 6.603, + "args": { + "External id": 516920,"Record function id": 0, "Ev Idx": 10039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146387361.572, "dur": 4.684, + "args": { + "External id": 516921,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146387362.110, "dur": 3.672, + "args": { + "External id": 516922,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146387362.561, "dur": 3.114, + "args": { + "External id": 516923,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146387369.903, "dur": 3.773, + "args": { + "External id": 516924,"Record function id": 0, "Ev Idx": 10043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146387371.140, "dur": 1.955, + "args": { + "External id": 516925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146387371.580, "dur": 1.009, + "args": { + "External id": 516926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146387371.845, "dur": 0.676, + "args": { + "External id": 516927,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146387376.645, "dur": 3.970, + "args": { + "External id": 516928,"Record function id": 0, "Ev Idx": 10047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146387377.768, "dur": 2.432, + "args": { + "External id": 516929,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146387378.368, "dur": 1.188, + "args": { + "External id": 516930,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146387378.813, "dur": 0.650, + "args": { + "External id": 516931,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146387383.508, "dur": 4.109, + "args": { + "External id": 516932,"Record function id": 0, "Ev Idx": 10051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146387384.628, "dur": 2.583, + "args": { + "External id": 516933,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146387385.274, "dur": 1.410, + "args": { + "External id": 516934,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146387385.895, "dur": 0.663, + "args": { + "External id": 516935,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146387391.439, "dur": 65392.252, + "args": { + "External id": 516936,"Record function id": 0, "Sequence number": 5516260, "Fwd thread id": 1, "Ev Idx": 10055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146387392.689, "dur": 65379.892, + "args": { + "External id": 516937,"Sequence number": 5516260, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10056 + } + }, + { + "ph": "f", "id": 427, "pid": 2338711, "tid": 2379440, "ts": 6274146387392.689, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.10)", "pid": 2338711, "tid": 2379440, + "ts": 6274146387426.863, "dur": 43.271, + "args": { + "External id": 516938,"Record function id": 0, "Ev Idx": 10057 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.10)", "pid": 2338711, "tid": 2379440, + "ts": 6274146387478.428, "dur": 69.142, + "args": { + "External id": 516939,"Record function id": 0, "Ev Idx": 10058 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.10)", "pid": 2338711, "tid": 2379440, + "ts": 6274146387553.830, "dur": 65209.793, + "args": { + "External id": 516940,"Record function id": 0, "Ev Idx": 10059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146387692.277, "dur": 9.641, + "args": { + "External id": 516941,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146387715.647, "dur": 6.044, + "args": { + "External id": 516942,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146387738.544, "dur": 63957.282, + "args": { + "External id": 516943,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146387754.592, "dur": 63925.229, + "args": { + "External id": 516944,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146387852.245, "dur": 20.355, + "args": { + "External id": 516945,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146387893.431, "dur": 63726.705, + "args": { + "External id": 516946,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146387896.584, "dur": 63722.059, + "args": { + "External id": 516947,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146387901.739, "dur": 10.047, + "args": { + "External id": 516948,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146387913.639, "dur": 63697.375, + "args": { + "External id": 516949,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146451818.564, "dur": 13.333, + "args": { + "External id": 516950,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146451822.469, "dur": 8.878, + "args": { + "External id": 516951,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146451868.164, "dur": 432.634, + "args": { + "External id": 516952,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146451904.202, "dur": 390.116, + "args": { + "External id": 516953,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10072, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146451917.079, "dur": 370.683, + "args": { + "External id": 516954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146452335.789, "dur": 6.391, + "args": { + "External id": 516955,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10074, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146452409.213, "dur": 9.523, + "args": { + "External id": 516956,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146452432.312, "dur": 35.859, + "args": { + "External id": 516957,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146452479.228, "dur": 1.786, + "args": { + "External id": 516958,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146452486.423, "dur": 12.360, + "args": { + "External id": 516959,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146452504.234, "dur": 1.156, + "args": { + "External id": 516960,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146452509.225, "dur": 11.105, + "args": { + "External id": 516961,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146452525.931, "dur": 0.766, + "args": { + "External id": 516962,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146452530.788, "dur": 11.408, + "args": { + "External id": 516963,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146452546.553, "dur": 0.990, + "args": { + "External id": 516964,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146452551.354, "dur": 11.290, + "args": { + "External id": 516965,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146452567.070, "dur": 1.108, + "args": { + "External id": 516966,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146452572.019, "dur": 10.906, + "args": { + "External id": 516967,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146452604.404, "dur": 2.389, + "args": { + "External id": 516968,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146452614.369, "dur": 18.948, + "args": { + "External id": 516969,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146452641.021, "dur": 1.306, + "args": { + "External id": 516970,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146452648.119, "dur": 13.807, + "args": { + "External id": 516971,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146452670.549, "dur": 3.555, + "args": { + "External id": 516972,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146452679.185, "dur": 14.645, + "args": { + "External id": 516973,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146452802.745, "dur": 3152.336, + "args": { + "External id": 516974,"Record function id": 0, "Ev Idx": 10093 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.9)", "pid": 2338711, "tid": 2379440, + "ts": 6274146452825.823, "dur": 1147.797, + "args": { + "External id": 516975,"Record function id": 0, "Ev Idx": 10094 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2338711, "tid": 2379440, + "ts": 6274146452844.290, "dur": 389.249, + "args": { + "External id": 516976,"Record function id": 0, "Ev Idx": 10095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146452930.456, "dur": 4.707, + "args": { + "External id": 516977,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146452939.307, "dur": 0.993, + "args": { + "External id": 516978,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146452942.208, "dur": 0.709, + "args": { + "External id": 516979,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146452944.847, "dur": 0.880, + "args": { + "External id": 516980,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146452947.353, "dur": 0.886, + "args": { + "External id": 516981,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146452949.670, "dur": 1.004, + "args": { + "External id": 516982,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146452954.524, "dur": 0.968, + "args": { + "External id": 516983,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146452956.688, "dur": 4.355, + "args": { + "External id": 516984,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146452962.478, "dur": 0.856, + "args": { + "External id": 516985,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146452964.827, "dur": 0.947, + "args": { + "External id": 516986,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146452987.302, "dur": 209.328, + "args": { + "External id": 516987,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146453005.055, "dur": 182.114, + "args": { + "External id": 516988,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146453023.543, "dur": 14.671, + "args": { + "External id": 516989,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146453041.903, "dur": 117.192, + "args": { + "External id": 516990,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146453044.948, "dur": 113.794, + "args": { + "External id": 516991,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146453049.358, "dur": 44.611, + "args": { + "External id": 516992,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146453096.803, "dur": 61.348, + "args": { + "External id": 516993,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10112 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.8", "pid": 2338711, "tid": 2379440, + "ts": 6274146453323.810, "dur": 641.844, + "args": { + "External id": 516994,"Record function id": 0, "Ev Idx": 10113 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2338711, "tid": 2379440, + "ts": 6274146453342.797, "dur": 609.793, + "args": { + "External id": 516995,"Record function id": 0, "Ev Idx": 10114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146453400.977, "dur": 5.646, + "args": { + "External id": 516996,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146453422.340, "dur": 30.754, + "args": { + "External id": 516997,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146453428.012, "dur": 1.853, + "args": { + "External id": 516998,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146453432.128, "dur": 0.459, + "args": { + "External id": 516999,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146453434.057, "dur": 2.764, + "args": { + "External id": 517000,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146453438.079, "dur": 0.624, + "args": { + "External id": 517001,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146453440.537, "dur": 0.691, + "args": { + "External id": 517002,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146453442.744, "dur": 0.397, + "args": { + "External id": 517003,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146453444.338, "dur": 0.605, + "args": { + "External id": 517004,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146453446.797, "dur": 0.587, + "args": { + "External id": 517005,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146453448.824, "dur": 0.513, + "args": { + "External id": 517006,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146453464.096, "dur": 45.912, + "args": { + "External id": 517007,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274146453541.639, "dur": 157.090, + "args": { + "External id": 517008,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146453552.397, "dur": 3.341, + "args": { + "External id": 517009,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274146453560.632, "dur": 12.985, + "args": { + "External id": 517010,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274146453565.329, "dur": 7.864, + "args": { + "External id": 517011,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146453569.299, "dur": 2.496, + "args": { + "External id": 517012,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146453580.020, "dur": 49.787, + "args": { + "External id": 517013,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146453581.571, "dur": 0.659, + "args": { + "External id": 517014,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146453584.154, "dur": 0.460, + "args": { + "External id": 517015,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146453585.988, "dur": 0.537, + "args": { + "External id": 517016,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146453607.613, "dur": 0.997, + "args": { + "External id": 517017,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146453611.644, "dur": 0.517, + "args": { + "External id": 517018,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146453614.052, "dur": 0.727, + "args": { + "External id": 517019,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146453617.021, "dur": 0.753, + "args": { + "External id": 517020,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146453620.439, "dur": 2.959, + "args": { + "External id": 517021,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146453625.073, "dur": 0.882, + "args": { + "External id": 517022,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146453644.828, "dur": 45.682, + "args": { + "External id": 517023,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146453750.537, "dur": 119.852, + "args": { + "External id": 517024,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146453775.956, "dur": 90.908, + "args": { + "External id": 517025,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10144, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146453788.214, "dur": 74.373, + "args": { + "External id": 517026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146453889.228, "dur": 2.045, + "args": { + "External id": 517027,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10146, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146453981.599, "dur": 1950.027, + "args": { + "External id": 517028,"Sequence number": 5516259, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10147 + } + }, + { + "ph": "f", "id": 428, "pid": 2338711, "tid": 2379440, "ts": 6274146453981.599, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146454154.757, "dur": 115.818, + "args": { + "External id": 517029,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146454317.880, "dur": 41.105, + "args": { + "External id": 517030,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274146454379.740, "dur": 49.492, + "args": { + "External id": 517031,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146454439.507, "dur": 33.897, + "args": { + "External id": 517032,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146454479.866, "dur": 33.649, + "args": { + "External id": 517033,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146454520.064, "dur": 29.503, + "args": { + "External id": 517034,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146454558.528, "dur": 49.808, + "args": { + "External id": 517035,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146454645.507, "dur": 33.415, + "args": { + "External id": 517036,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146454700.941, "dur": 31.123, + "args": { + "External id": 517037,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146454753.862, "dur": 19.269, + "args": { + "External id": 517038,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146454788.515, "dur": 15.707, + "args": { + "External id": 517039,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146454815.655, "dur": 46.274, + "args": { + "External id": 517040,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146454866.293, "dur": 35.857, + "args": { + "External id": 517041,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274146454940.270, "dur": 309.060, + "args": { + "External id": 517042,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146455022.950, "dur": 6.985, + "args": { + "External id": 517043,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146455032.361, "dur": 2.727, + "args": { + "External id": 517044,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146455036.923, "dur": 5.431, + "args": { + "External id": 517045,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146455043.442, "dur": 2.535, + "args": { + "External id": 517046,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146455136.395, "dur": 5.910, + "args": { + "External id": 517047,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146455138.487, "dur": 3.354, + "args": { + "External id": 517048,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146455144.361, "dur": 35.715, + "args": { + "External id": 517049,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146455150.636, "dur": 2.084, + "args": { + "External id": 517050,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146455181.719, "dur": 2.222, + "args": { + "External id": 517051,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146455183.037, "dur": 0.814, + "args": { + "External id": 517052,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146455184.854, "dur": 17.318, + "args": { + "External id": 517053,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146455187.100, "dur": 0.796, + "args": { + "External id": 517054,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146455288.068, "dur": 29.767, + "args": { + "External id": 517055,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146455334.668, "dur": 20.254, + "args": { + "External id": 517056,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146455363.779, "dur": 67.572, + "args": { + "External id": 517057,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146455445.292, "dur": 46.419, + "args": { + "External id": 517058,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146455502.737, "dur": 22.435, + "args": { + "External id": 517059,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146455533.963, "dur": 35.899, + "args": { + "External id": 517060,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146455577.512, "dur": 57.280, + "args": { + "External id": 517061,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146455648.054, "dur": 44.399, + "args": { + "External id": 517062,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274146455720.908, "dur": 33.863, + "args": { + "External id": 517063,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146455773.843, "dur": 29.555, + "args": { + "External id": 517064,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146455818.219, "dur": 18.129, + "args": { + "External id": 517065,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146455851.051, "dur": 14.833, + "args": { + "External id": 517066,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274146455878.544, "dur": 16.821, + "args": { + "External id": 517067,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146455980.315, "dur": 17.493, + "args": { + "External id": 517068,"Record function id": 0, "Ev Idx": 10187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146455983.942, "dur": 12.962, + "args": { + "External id": 517069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146455988.611, "dur": 7.168, + "args": { + "External id": 517070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146455990.817, "dur": 4.805, + "args": { + "External id": 517071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146456001.650, "dur": 5.280, + "args": { + "External id": 517072,"Record function id": 0, "Ev Idx": 10191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146456003.064, "dur": 3.423, + "args": { + "External id": 517073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146456003.845, "dur": 2.146, + "args": { + "External id": 517074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146456004.837, "dur": 1.056, + "args": { + "External id": 517075,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146456010.094, "dur": 4.487, + "args": { + "External id": 517076,"Record function id": 0, "Ev Idx": 10195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146456011.409, "dur": 2.776, + "args": { + "External id": 517077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146456012.096, "dur": 1.596, + "args": { + "External id": 517078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146456012.686, "dur": 0.937, + "args": { + "External id": 517079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146456017.655, "dur": 3.703, + "args": { + "External id": 517080,"Record function id": 0, "Ev Idx": 10199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146456018.661, "dur": 2.301, + "args": { + "External id": 517081,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146456019.365, "dur": 1.130, + "args": { + "External id": 517082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146456019.692, "dur": 0.737, + "args": { + "External id": 517083,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146456024.377, "dur": 4.014, + "args": { + "External id": 517084,"Record function id": 0, "Ev Idx": 10203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146456025.772, "dur": 2.166, + "args": { + "External id": 517085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146456026.231, "dur": 1.281, + "args": { + "External id": 517086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146456026.752, "dur": 0.683, + "args": { + "External id": 517087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146456031.707, "dur": 4.503, + "args": { + "External id": 517088,"Record function id": 0, "Ev Idx": 10207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146456033.024, "dur": 2.761, + "args": { + "External id": 517089,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146456033.858, "dur": 1.455, + "args": { + "External id": 517090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146456034.331, "dur": 0.859, + "args": { + "External id": 517091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146456039.368, "dur": 6.607, + "args": { + "External id": 517092,"Record function id": 0, "Ev Idx": 10211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146456040.798, "dur": 4.777, + "args": { + "External id": 517093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146456041.312, "dur": 3.799, + "args": { + "External id": 517094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146456041.730, "dur": 3.298, + "args": { + "External id": 517095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146456048.971, "dur": 4.104, + "args": { + "External id": 517096,"Record function id": 0, "Ev Idx": 10215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146456050.493, "dur": 2.178, + "args": { + "External id": 517097,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146456050.965, "dur": 1.234, + "args": { + "External id": 517098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146456051.252, "dur": 0.843, + "args": { + "External id": 517099,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146456091.694, "dur": 8.210, + "args": { + "External id": 517100,"Record function id": 0, "Ev Idx": 10219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146456094.426, "dur": 4.776, + "args": { + "External id": 517101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146456095.528, "dur": 2.731, + "args": { + "External id": 517102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146456096.545, "dur": 1.451, + "args": { + "External id": 517103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146456104.864, "dur": 65829.409, + "args": { + "External id": 517104,"Record function id": 0, "Sequence number": 5516258, "Fwd thread id": 1, "Ev Idx": 10223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146456106.308, "dur": 65817.358, + "args": { + "External id": 517105,"Sequence number": 5516258, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10224 + } + }, + { + "ph": "f", "id": 429, "pid": 2338711, "tid": 2379440, "ts": 6274146456106.308, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.9)", "pid": 2338711, "tid": 2379440, + "ts": 6274146456141.407, "dur": 42.129, + "args": { + "External id": 517106,"Record function id": 0, "Ev Idx": 10225 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.9)", "pid": 2338711, "tid": 2379440, + "ts": 6274146456192.142, "dur": 66.245, + "args": { + "External id": 517107,"Record function id": 0, "Ev Idx": 10226 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.9)", "pid": 2338711, "tid": 2379440, + "ts": 6274146456265.100, "dur": 65648.934, + "args": { + "External id": 517108,"Record function id": 0, "Ev Idx": 10227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146456360.568, "dur": 7.958, + "args": { + "External id": 517109,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146456379.553, "dur": 5.202, + "args": { + "External id": 517110,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146456400.042, "dur": 64392.108, + "args": { + "External id": 517111,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146456414.477, "dur": 64362.734, + "args": { + "External id": 517112,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146456513.370, "dur": 19.412, + "args": { + "External id": 517113,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146456552.696, "dur": 64174.015, + "args": { + "External id": 517114,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146456556.029, "dur": 64169.268, + "args": { + "External id": 517115,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146456560.963, "dur": 10.029, + "args": { + "External id": 517116,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146456573.164, "dur": 64145.209, + "args": { + "External id": 517117,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146520916.395, "dur": 13.648, + "args": { + "External id": 517118,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146520920.361, "dur": 9.284, + "args": { + "External id": 517119,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146520968.377, "dur": 498.599, + "args": { + "External id": 517120,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146521032.506, "dur": 428.437, + "args": { + "External id": 517121,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10240, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146521087.239, "dur": 367.766, + "args": { + "External id": 517122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146521493.768, "dur": 2.451, + "args": { + "External id": 517123,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10242, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146521568.278, "dur": 7.746, + "args": { + "External id": 517124,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146521609.667, "dur": 47.405, + "args": { + "External id": 517125,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146521673.289, "dur": 5.768, + "args": { + "External id": 517126,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146521687.299, "dur": 12.960, + "args": { + "External id": 517127,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146521706.625, "dur": 1.113, + "args": { + "External id": 517128,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146521712.531, "dur": 11.897, + "args": { + "External id": 517129,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146521730.419, "dur": 1.032, + "args": { + "External id": 517130,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146521735.262, "dur": 10.061, + "args": { + "External id": 517131,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146521749.888, "dur": 1.080, + "args": { + "External id": 517132,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146521754.897, "dur": 10.671, + "args": { + "External id": 517133,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146521769.926, "dur": 1.407, + "args": { + "External id": 517134,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146521775.118, "dur": 9.361, + "args": { + "External id": 517135,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146521789.269, "dur": 1.186, + "args": { + "External id": 517136,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146521794.506, "dur": 10.322, + "args": { + "External id": 517137,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146521809.555, "dur": 0.949, + "args": { + "External id": 517138,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146521814.497, "dur": 10.158, + "args": { + "External id": 517139,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146521831.214, "dur": 0.907, + "args": { + "External id": 517140,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146521836.083, "dur": 10.131, + "args": { + "External id": 517141,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146521952.323, "dur": 3262.640, + "args": { + "External id": 517142,"Record function id": 0, "Ev Idx": 10261 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.8)", "pid": 2338711, "tid": 2379440, + "ts": 6274146521974.624, "dur": 1255.472, + "args": { + "External id": 517143,"Record function id": 0, "Ev Idx": 10262 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2338711, "tid": 2379440, + "ts": 6274146521992.072, "dur": 407.123, + "args": { + "External id": 517144,"Record function id": 0, "Ev Idx": 10263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146522125.616, "dur": 7.590, + "args": { + "External id": 517145,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146522138.111, "dur": 0.952, + "args": { + "External id": 517146,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146522141.104, "dur": 1.005, + "args": { + "External id": 517147,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146522144.284, "dur": 0.844, + "args": { + "External id": 517148,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146522146.831, "dur": 1.153, + "args": { + "External id": 517149,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146522149.448, "dur": 0.825, + "args": { + "External id": 517150,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146522152.102, "dur": 0.780, + "args": { + "External id": 517151,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146522154.180, "dur": 5.749, + "args": { + "External id": 517152,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146522161.748, "dur": 2.677, + "args": { + "External id": 517153,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146522168.195, "dur": 0.760, + "args": { + "External id": 517154,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146522189.778, "dur": 174.890, + "args": { + "External id": 517155,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146522207.300, "dur": 152.236, + "args": { + "External id": 517156,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146522231.189, "dur": 15.486, + "args": { + "External id": 517157,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146522250.684, "dur": 77.867, + "args": { + "External id": 517158,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146522254.144, "dur": 74.088, + "args": { + "External id": 517159,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146522259.036, "dur": 6.893, + "args": { + "External id": 517160,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146522267.604, "dur": 59.965, + "args": { + "External id": 517161,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10280 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.7", "pid": 2338711, "tid": 2379440, + "ts": 6274146522495.936, "dur": 725.663, + "args": { + "External id": 517162,"Record function id": 0, "Ev Idx": 10281 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2338711, "tid": 2379440, + "ts": 6274146522515.275, "dur": 692.508, + "args": { + "External id": 517163,"Record function id": 0, "Ev Idx": 10282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146522577.102, "dur": 5.483, + "args": { + "External id": 517164,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146522620.969, "dur": 45.667, + "args": { + "External id": 517165,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146522628.255, "dur": 2.398, + "args": { + "External id": 517166,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146522634.566, "dur": 1.006, + "args": { + "External id": 517167,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146522638.160, "dur": 0.859, + "args": { + "External id": 517168,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146522641.233, "dur": 3.438, + "args": { + "External id": 517169,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146522647.402, "dur": 0.945, + "args": { + "External id": 517170,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146522651.146, "dur": 0.608, + "args": { + "External id": 517171,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146522653.677, "dur": 0.672, + "args": { + "External id": 517172,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146522656.998, "dur": 0.674, + "args": { + "External id": 517173,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146522660.312, "dur": 0.906, + "args": { + "External id": 517174,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146522690.720, "dur": 57.212, + "args": { + "External id": 517175,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274146522783.780, "dur": 117.864, + "args": { + "External id": 517176,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146522795.611, "dur": 4.468, + "args": { + "External id": 517177,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274146522805.608, "dur": 11.197, + "args": { + "External id": 517178,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274146522810.510, "dur": 5.853, + "args": { + "External id": 517179,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146522814.487, "dur": 0.557, + "args": { + "External id": 517180,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146522823.982, "dur": 26.005, + "args": { + "External id": 517181,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146522826.095, "dur": 2.945, + "args": { + "External id": 517182,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146522830.837, "dur": 0.628, + "args": { + "External id": 517183,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146522832.819, "dur": 0.737, + "args": { + "External id": 517184,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146522835.382, "dur": 0.429, + "args": { + "External id": 517185,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146522837.258, "dur": 0.398, + "args": { + "External id": 517186,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146522838.856, "dur": 0.513, + "args": { + "External id": 517187,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146522841.086, "dur": 0.383, + "args": { + "External id": 517188,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146522843.156, "dur": 0.364, + "args": { + "External id": 517189,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146522844.529, "dur": 2.594, + "args": { + "External id": 517190,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146522861.268, "dur": 32.388, + "args": { + "External id": 517191,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146522950.984, "dur": 169.023, + "args": { + "External id": 517192,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146522980.148, "dur": 135.517, + "args": { + "External id": 517193,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10312, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146522991.162, "dur": 119.463, + "args": { + "External id": 517194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146523142.056, "dur": 2.322, + "args": { + "External id": 517195,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10314, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146523238.404, "dur": 1954.331, + "args": { + "External id": 517196,"Sequence number": 5516257, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10315 + } + }, + { + "ph": "f", "id": 430, "pid": 2338711, "tid": 2379440, "ts": 6274146523238.404, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146523359.127, "dur": 118.019, + "args": { + "External id": 517197,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146523521.753, "dur": 40.001, + "args": { + "External id": 517198,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274146523581.720, "dur": 92.062, + "args": { + "External id": 517199,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146523692.277, "dur": 40.546, + "args": { + "External id": 517200,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146523740.018, "dur": 34.779, + "args": { + "External id": 517201,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146523781.501, "dur": 28.868, + "args": { + "External id": 517202,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146523819.459, "dur": 29.519, + "args": { + "External id": 517203,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146523879.345, "dur": 26.577, + "args": { + "External id": 517204,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146523926.762, "dur": 30.961, + "args": { + "External id": 517205,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146523979.344, "dur": 19.761, + "args": { + "External id": 517206,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146524012.057, "dur": 18.540, + "args": { + "External id": 517207,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146524041.916, "dur": 82.555, + "args": { + "External id": 517208,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146524131.136, "dur": 39.580, + "args": { + "External id": 517209,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274146524210.724, "dur": 292.698, + "args": { + "External id": 517210,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146524301.187, "dur": 6.565, + "args": { + "External id": 517211,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146524310.105, "dur": 2.893, + "args": { + "External id": 517212,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146524314.574, "dur": 2.414, + "args": { + "External id": 517213,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146524318.766, "dur": 4.552, + "args": { + "External id": 517214,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146524384.852, "dur": 6.423, + "args": { + "External id": 517215,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146524387.146, "dur": 3.523, + "args": { + "External id": 517216,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146524393.167, "dur": 35.100, + "args": { + "External id": 517217,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146524399.411, "dur": 2.032, + "args": { + "External id": 517218,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146524430.160, "dur": 2.062, + "args": { + "External id": 517219,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146524431.345, "dur": 0.797, + "args": { + "External id": 517220,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146524433.216, "dur": 15.126, + "args": { + "External id": 517221,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146524435.500, "dur": 0.683, + "args": { + "External id": 517222,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146524545.031, "dur": 28.406, + "args": { + "External id": 517223,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146524610.824, "dur": 28.008, + "args": { + "External id": 517224,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146524653.540, "dur": 58.702, + "args": { + "External id": 517225,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146524721.064, "dur": 41.940, + "args": { + "External id": 517226,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146524773.627, "dur": 21.302, + "args": { + "External id": 517227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146524804.376, "dur": 32.323, + "args": { + "External id": 517228,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146524845.183, "dur": 30.786, + "args": { + "External id": 517229,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146524883.387, "dur": 33.022, + "args": { + "External id": 517230,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274146524935.870, "dur": 27.088, + "args": { + "External id": 517231,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146524979.351, "dur": 27.485, + "args": { + "External id": 517232,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146525021.574, "dur": 20.551, + "args": { + "External id": 517233,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146525092.376, "dur": 21.307, + "args": { + "External id": 517234,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274146525136.699, "dur": 19.907, + "args": { + "External id": 517235,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146525240.496, "dur": 17.383, + "args": { + "External id": 517236,"Record function id": 0, "Ev Idx": 10355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146525244.350, "dur": 12.437, + "args": { + "External id": 517237,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146525249.065, "dur": 6.689, + "args": { + "External id": 517238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146525250.908, "dur": 4.684, + "args": { + "External id": 517239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146525262.112, "dur": 5.282, + "args": { + "External id": 517240,"Record function id": 0, "Ev Idx": 10359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146525263.216, "dur": 3.723, + "args": { + "External id": 517241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146525263.945, "dur": 2.454, + "args": { + "External id": 517242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146525265.188, "dur": 1.102, + "args": { + "External id": 517243,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146525270.606, "dur": 4.394, + "args": { + "External id": 517244,"Record function id": 0, "Ev Idx": 10363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146525271.907, "dur": 2.580, + "args": { + "External id": 517245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146525272.397, "dur": 1.645, + "args": { + "External id": 517246,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146525272.927, "dur": 1.032, + "args": { + "External id": 517247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146525278.169, "dur": 4.426, + "args": { + "External id": 517248,"Record function id": 0, "Ev Idx": 10367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146525279.226, "dur": 2.913, + "args": { + "External id": 517249,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146525279.786, "dur": 1.910, + "args": { + "External id": 517250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146525280.555, "dur": 1.067, + "args": { + "External id": 517251,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146525285.605, "dur": 4.158, + "args": { + "External id": 517252,"Record function id": 0, "Ev Idx": 10371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146525286.984, "dur": 2.356, + "args": { + "External id": 517253,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146525287.485, "dur": 1.162, + "args": { + "External id": 517254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146525288.032, "dur": 0.538, + "args": { + "External id": 517255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146525292.799, "dur": 6.718, + "args": { + "External id": 517256,"Record function id": 0, "Ev Idx": 10375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146525293.689, "dur": 5.367, + "args": { + "External id": 517257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146525294.421, "dur": 4.158, + "args": { + "External id": 517258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146525295.192, "dur": 3.286, + "args": { + "External id": 517259,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146525302.707, "dur": 4.079, + "args": { + "External id": 517260,"Record function id": 0, "Ev Idx": 10379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146525303.982, "dur": 2.398, + "args": { + "External id": 517261,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146525304.430, "dur": 1.550, + "args": { + "External id": 517262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146525305.135, "dur": 0.761, + "args": { + "External id": 517263,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146525309.787, "dur": 4.000, + "args": { + "External id": 517264,"Record function id": 0, "Ev Idx": 10383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146525310.896, "dur": 2.466, + "args": { + "External id": 517265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146525311.434, "dur": 1.483, + "args": { + "External id": 517266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146525311.707, "dur": 1.120, + "args": { + "External id": 517267,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146525317.030, "dur": 4.686, + "args": { + "External id": 517268,"Record function id": 0, "Ev Idx": 10387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146525318.590, "dur": 2.622, + "args": { + "External id": 517269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146525319.064, "dur": 1.687, + "args": { + "External id": 517270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146525319.974, "dur": 0.666, + "args": { + "External id": 517271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146525325.653, "dur": 64775.227, + "args": { + "External id": 517272,"Record function id": 0, "Sequence number": 5516256, "Fwd thread id": 1, "Ev Idx": 10391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146525327.014, "dur": 64761.305, + "args": { + "External id": 517273,"Sequence number": 5516256, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10392 + } + }, + { + "ph": "f", "id": 431, "pid": 2338711, "tid": 2379440, "ts": 6274146525327.014, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.8)", "pid": 2338711, "tid": 2379440, + "ts": 6274146525360.825, "dur": 43.291, + "args": { + "External id": 517274,"Record function id": 0, "Ev Idx": 10393 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.8)", "pid": 2338711, "tid": 2379440, + "ts": 6274146525412.081, "dur": 67.470, + "args": { + "External id": 517275,"Record function id": 0, "Ev Idx": 10394 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.8)", "pid": 2338711, "tid": 2379440, + "ts": 6274146525486.323, "dur": 64559.411, + "args": { + "External id": 517276,"Record function id": 0, "Ev Idx": 10395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146525582.932, "dur": 27.587, + "args": { + "External id": 517277,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146525626.921, "dur": 6.346, + "args": { + "External id": 517278,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146525654.067, "dur": 63471.778, + "args": { + "External id": 517279,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146525671.795, "dur": 63438.499, + "args": { + "External id": 517280,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146525777.053, "dur": 21.398, + "args": { + "External id": 517281,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146525821.911, "dur": 63205.753, + "args": { + "External id": 517282,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146525825.311, "dur": 63201.079, + "args": { + "External id": 517283,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146525830.270, "dur": 9.535, + "args": { + "External id": 517284,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146525841.642, "dur": 63177.879, + "args": { + "External id": 517285,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146589250.423, "dur": 17.330, + "args": { + "External id": 517286,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146589258.175, "dur": 9.202, + "args": { + "External id": 517287,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146589301.634, "dur": 348.966, + "args": { + "External id": 517288,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146589340.852, "dur": 303.369, + "args": { + "External id": 517289,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10408, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146589353.898, "dur": 282.423, + "args": { + "External id": 517290,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146589678.850, "dur": 2.694, + "args": { + "External id": 517291,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10410, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146589748.216, "dur": 7.487, + "args": { + "External id": 517292,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146589769.252, "dur": 37.084, + "args": { + "External id": 517293,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146589817.097, "dur": 1.938, + "args": { + "External id": 517294,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146589824.024, "dur": 12.172, + "args": { + "External id": 517295,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146589841.769, "dur": 3.214, + "args": { + "External id": 517296,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146589849.679, "dur": 10.839, + "args": { + "External id": 517297,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146589865.315, "dur": 0.894, + "args": { + "External id": 517298,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146589870.679, "dur": 10.847, + "args": { + "External id": 517299,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146589885.911, "dur": 0.853, + "args": { + "External id": 517300,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146589890.618, "dur": 11.457, + "args": { + "External id": 517301,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146589906.137, "dur": 1.130, + "args": { + "External id": 517302,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146589911.788, "dur": 10.217, + "args": { + "External id": 517303,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146589926.366, "dur": 0.888, + "args": { + "External id": 517304,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146589931.084, "dur": 10.842, + "args": { + "External id": 517305,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146589948.136, "dur": 0.810, + "args": { + "External id": 517306,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146589953.382, "dur": 9.469, + "args": { + "External id": 517307,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146589967.315, "dur": 0.957, + "args": { + "External id": 517308,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146589972.545, "dur": 10.958, + "args": { + "External id": 517309,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146590118.500, "dur": 3241.709, + "args": { + "External id": 517310,"Record function id": 0, "Ev Idx": 10429 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.7)", "pid": 2338711, "tid": 2379440, + "ts": 6274146590147.656, "dur": 1170.902, + "args": { + "External id": 517311,"Record function id": 0, "Ev Idx": 10430 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2338711, "tid": 2379440, + "ts": 6274146590167.623, "dur": 357.400, + "args": { + "External id": 517312,"Record function id": 0, "Ev Idx": 10431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146590268.826, "dur": 5.348, + "args": { + "External id": 517313,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146590277.926, "dur": 3.658, + "args": { + "External id": 517314,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146590283.888, "dur": 0.805, + "args": { + "External id": 517315,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146590286.406, "dur": 0.867, + "args": { + "External id": 517316,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146590289.348, "dur": 0.678, + "args": { + "External id": 517317,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146590291.592, "dur": 0.944, + "args": { + "External id": 517318,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146590296.387, "dur": 0.917, + "args": { + "External id": 517319,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146590298.735, "dur": 2.331, + "args": { + "External id": 517320,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146590302.457, "dur": 0.667, + "args": { + "External id": 517321,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146590304.434, "dur": 3.378, + "args": { + "External id": 517322,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146590329.702, "dur": 164.065, + "args": { + "External id": 517323,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146590348.016, "dur": 140.880, + "args": { + "External id": 517324,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146590368.522, "dur": 15.833, + "args": { + "External id": 517325,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146590388.946, "dur": 67.651, + "args": { + "External id": 517326,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146590392.238, "dur": 64.020, + "args": { + "External id": 517327,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146590396.709, "dur": 6.100, + "args": { + "External id": 517328,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146590404.830, "dur": 50.506, + "args": { + "External id": 517329,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10448 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.6", "pid": 2338711, "tid": 2379440, + "ts": 6274146590642.433, "dur": 668.127, + "args": { + "External id": 517330,"Record function id": 0, "Ev Idx": 10449 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2338711, "tid": 2379440, + "ts": 6274146590667.218, "dur": 628.880, + "args": { + "External id": 517331,"Record function id": 0, "Ev Idx": 10450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146590733.341, "dur": 7.573, + "args": { + "External id": 517332,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146590757.906, "dur": 32.854, + "args": { + "External id": 517333,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146590764.498, "dur": 2.314, + "args": { + "External id": 517334,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146590769.238, "dur": 0.800, + "args": { + "External id": 517335,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146590771.769, "dur": 0.735, + "args": { + "External id": 517336,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146590773.981, "dur": 0.370, + "args": { + "External id": 517337,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146590775.663, "dur": 2.617, + "args": { + "External id": 517338,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146590779.811, "dur": 0.405, + "args": { + "External id": 517339,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146590782.040, "dur": 0.296, + "args": { + "External id": 517340,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146590783.842, "dur": 0.627, + "args": { + "External id": 517341,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146590786.070, "dur": 0.431, + "args": { + "External id": 517342,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146590801.845, "dur": 44.795, + "args": { + "External id": 517343,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274146590879.303, "dur": 119.438, + "args": { + "External id": 517344,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146590891.474, "dur": 7.049, + "args": { + "External id": 517345,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274146590904.033, "dur": 11.798, + "args": { + "External id": 517346,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274146590909.460, "dur": 5.965, + "args": { + "External id": 517347,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146590913.094, "dur": 1.050, + "args": { + "External id": 517348,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146590922.781, "dur": 24.339, + "args": { + "External id": 517349,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146590924.802, "dur": 0.554, + "args": { + "External id": 517350,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146590926.596, "dur": 2.496, + "args": { + "External id": 517351,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146590931.004, "dur": 0.654, + "args": { + "External id": 517352,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146590933.718, "dur": 0.456, + "args": { + "External id": 517353,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146590935.315, "dur": 0.499, + "args": { + "External id": 517354,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146590937.619, "dur": 0.432, + "args": { + "External id": 517355,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146590939.847, "dur": 0.388, + "args": { + "External id": 517356,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146590941.593, "dur": 0.418, + "args": { + "External id": 517357,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146590943.747, "dur": 0.679, + "args": { + "External id": 517358,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146590957.033, "dur": 32.917, + "args": { + "External id": 517359,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146591046.998, "dur": 166.743, + "args": { + "External id": 517360,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146591115.442, "dur": 94.193, + "args": { + "External id": 517361,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10480, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146591126.982, "dur": 78.150, + "args": { + "External id": 517362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146591234.396, "dur": 1.660, + "args": { + "External id": 517363,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10482, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146591326.538, "dur": 2010.558, + "args": { + "External id": 517364,"Sequence number": 5516255, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10483 + } + }, + { + "ph": "f", "id": 432, "pid": 2338711, "tid": 2379440, "ts": 6274146591326.538, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146591444.770, "dur": 116.248, + "args": { + "External id": 517365,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146591630.774, "dur": 56.041, + "args": { + "External id": 517366,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274146591715.112, "dur": 66.054, + "args": { + "External id": 517367,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146591792.932, "dur": 35.361, + "args": { + "External id": 517368,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146591835.181, "dur": 34.713, + "args": { + "External id": 517369,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146591876.774, "dur": 30.541, + "args": { + "External id": 517370,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146591916.274, "dur": 31.070, + "args": { + "External id": 517371,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146591977.436, "dur": 28.397, + "args": { + "External id": 517372,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146592027.813, "dur": 72.701, + "args": { + "External id": 517373,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146592129.362, "dur": 23.550, + "args": { + "External id": 517374,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146592168.857, "dur": 15.997, + "args": { + "External id": 517375,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146592201.438, "dur": 46.040, + "args": { + "External id": 517376,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146592251.810, "dur": 37.965, + "args": { + "External id": 517377,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274146592322.425, "dur": 305.934, + "args": { + "External id": 517378,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146592411.959, "dur": 7.077, + "args": { + "External id": 517379,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146592421.402, "dur": 2.682, + "args": { + "External id": 517380,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146592426.113, "dur": 2.729, + "args": { + "External id": 517381,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146592430.185, "dur": 4.654, + "args": { + "External id": 517382,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146592485.372, "dur": 5.912, + "args": { + "External id": 517383,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146592487.790, "dur": 3.301, + "args": { + "External id": 517384,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146592493.314, "dur": 44.425, + "args": { + "External id": 517385,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146592499.972, "dur": 1.850, + "args": { + "External id": 517386,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146592539.273, "dur": 2.170, + "args": { + "External id": 517387,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146592540.548, "dur": 0.787, + "args": { + "External id": 517388,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146592542.891, "dur": 15.324, + "args": { + "External id": 517389,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146592544.819, "dur": 0.695, + "args": { + "External id": 517390,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146592681.420, "dur": 38.609, + "args": { + "External id": 517391,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146592739.665, "dur": 19.140, + "args": { + "External id": 517392,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146592769.848, "dur": 76.151, + "args": { + "External id": 517393,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146592861.094, "dur": 48.160, + "args": { + "External id": 517394,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146592920.156, "dur": 24.637, + "args": { + "External id": 517395,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146592953.365, "dur": 32.818, + "args": { + "External id": 517396,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146592997.729, "dur": 29.914, + "args": { + "External id": 517397,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146593035.248, "dur": 67.722, + "args": { + "External id": 517398,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274146593130.478, "dur": 29.453, + "args": { + "External id": 517399,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146593178.038, "dur": 26.743, + "args": { + "External id": 517400,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146593218.660, "dur": 18.516, + "args": { + "External id": 517401,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146593253.238, "dur": 14.303, + "args": { + "External id": 517402,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274146593280.890, "dur": 18.374, + "args": { + "External id": 517403,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146593385.236, "dur": 16.407, + "args": { + "External id": 517404,"Record function id": 0, "Ev Idx": 10523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146593388.779, "dur": 11.744, + "args": { + "External id": 517405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146593393.516, "dur": 6.074, + "args": { + "External id": 517406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146593395.213, "dur": 4.251, + "args": { + "External id": 517407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146593405.519, "dur": 5.366, + "args": { + "External id": 517408,"Record function id": 0, "Ev Idx": 10527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146593406.904, "dur": 3.437, + "args": { + "External id": 517409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146593407.626, "dur": 2.190, + "args": { + "External id": 517410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146593408.584, "dur": 1.102, + "args": { + "External id": 517411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146593414.057, "dur": 5.020, + "args": { + "External id": 517412,"Record function id": 0, "Ev Idx": 10531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146593415.607, "dur": 3.059, + "args": { + "External id": 517413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146593416.277, "dur": 1.940, + "args": { + "External id": 517414,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146593416.930, "dur": 1.203, + "args": { + "External id": 517415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146593422.180, "dur": 3.929, + "args": { + "External id": 517416,"Record function id": 0, "Ev Idx": 10535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146593423.166, "dur": 2.543, + "args": { + "External id": 517417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146593423.808, "dur": 1.484, + "args": { + "External id": 517418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146593424.473, "dur": 0.756, + "args": { + "External id": 517419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146593429.179, "dur": 4.321, + "args": { + "External id": 517420,"Record function id": 0, "Ev Idx": 10539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146593430.416, "dur": 2.620, + "args": { + "External id": 517421,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146593431.239, "dur": 1.345, + "args": { + "External id": 517422,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146593431.667, "dur": 0.850, + "args": { + "External id": 517423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146593436.727, "dur": 6.676, + "args": { + "External id": 517424,"Record function id": 0, "Ev Idx": 10543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146593438.118, "dur": 4.833, + "args": { + "External id": 517425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146593438.593, "dur": 3.934, + "args": { + "External id": 517426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146593439.147, "dur": 3.280, + "args": { + "External id": 517427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146593446.657, "dur": 4.278, + "args": { + "External id": 517428,"Record function id": 0, "Ev Idx": 10547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146593447.996, "dur": 2.498, + "args": { + "External id": 517429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146593448.647, "dur": 1.395, + "args": { + "External id": 517430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146593449.124, "dur": 0.827, + "args": { + "External id": 517431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146593454.106, "dur": 4.192, + "args": { + "External id": 517432,"Record function id": 0, "Ev Idx": 10551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146593455.400, "dur": 2.466, + "args": { + "External id": 517433,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146593456.066, "dur": 1.319, + "args": { + "External id": 517434,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146593456.501, "dur": 0.796, + "args": { + "External id": 517435,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146593461.640, "dur": 4.034, + "args": { + "External id": 517436,"Record function id": 0, "Ev Idx": 10555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146593462.780, "dur": 2.467, + "args": { + "External id": 517437,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146593463.242, "dur": 1.486, + "args": { + "External id": 517438,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146593463.846, "dur": 0.757, + "args": { + "External id": 517439,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146593469.928, "dur": 65493.614, + "args": { + "External id": 517440,"Record function id": 0, "Sequence number": 5516254, "Fwd thread id": 1, "Ev Idx": 10559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146593471.484, "dur": 65481.920, + "args": { + "External id": 517441,"Sequence number": 5516254, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10560 + } + }, + { + "ph": "f", "id": 433, "pid": 2338711, "tid": 2379440, "ts": 6274146593471.484, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.7)", "pid": 2338711, "tid": 2379440, + "ts": 6274146593506.149, "dur": 42.734, + "args": { + "External id": 517442,"Record function id": 0, "Ev Idx": 10561 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.7)", "pid": 2338711, "tid": 2379440, + "ts": 6274146593557.063, "dur": 104.366, + "args": { + "External id": 517443,"Record function id": 0, "Ev Idx": 10562 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.7)", "pid": 2338711, "tid": 2379440, + "ts": 6274146593672.583, "dur": 65272.384, + "args": { + "External id": 517444,"Record function id": 0, "Ev Idx": 10563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146593775.639, "dur": 8.116, + "args": { + "External id": 517445,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146593796.489, "dur": 5.643, + "args": { + "External id": 517446,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146593817.525, "dur": 64083.054, + "args": { + "External id": 517447,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146593833.301, "dur": 64052.918, + "args": { + "External id": 517448,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146593945.485, "dur": 21.242, + "args": { + "External id": 517449,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146593987.394, "dur": 63843.328, + "args": { + "External id": 517450,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146593990.628, "dur": 63838.395, + "args": { + "External id": 517451,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146593995.586, "dur": 9.565, + "args": { + "External id": 517452,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146594007.420, "dur": 63814.306, + "args": { + "External id": 517453,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146658032.482, "dur": 13.369, + "args": { + "External id": 517454,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146658036.337, "dur": 9.099, + "args": { + "External id": 517455,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146658118.870, "dur": 396.023, + "args": { + "External id": 517456,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146658160.613, "dur": 349.108, + "args": { + "External id": 517457,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10576, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146658174.631, "dur": 329.520, + "args": { + "External id": 517458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146658537.131, "dur": 2.399, + "args": { + "External id": 517459,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10578, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146658629.907, "dur": 8.865, + "args": { + "External id": 517460,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146658654.574, "dur": 43.710, + "args": { + "External id": 517461,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146658709.946, "dur": 4.437, + "args": { + "External id": 517462,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146658719.927, "dur": 12.373, + "args": { + "External id": 517463,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146658738.703, "dur": 1.078, + "args": { + "External id": 517464,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146658744.416, "dur": 11.846, + "args": { + "External id": 517465,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146658761.368, "dur": 0.980, + "args": { + "External id": 517466,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146658766.638, "dur": 10.251, + "args": { + "External id": 517467,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146658781.363, "dur": 0.788, + "args": { + "External id": 517468,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146658786.503, "dur": 10.558, + "args": { + "External id": 517469,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146658801.371, "dur": 0.894, + "args": { + "External id": 517470,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146658806.789, "dur": 10.291, + "args": { + "External id": 517471,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146658823.524, "dur": 0.924, + "args": { + "External id": 517472,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146658828.520, "dur": 10.515, + "args": { + "External id": 517473,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146658843.052, "dur": 1.129, + "args": { + "External id": 517474,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146658848.067, "dur": 10.111, + "args": { + "External id": 517475,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146658861.996, "dur": 0.845, + "args": { + "External id": 517476,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146658867.014, "dur": 10.587, + "args": { + "External id": 517477,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146658981.811, "dur": 3259.167, + "args": { + "External id": 517478,"Record function id": 0, "Ev Idx": 10597 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.6)", "pid": 2338711, "tid": 2379440, + "ts": 6274146659006.265, "dur": 1232.337, + "args": { + "External id": 517479,"Record function id": 0, "Ev Idx": 10598 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2338711, "tid": 2379440, + "ts": 6274146659025.253, "dur": 385.102, + "args": { + "External id": 517480,"Record function id": 0, "Ev Idx": 10599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146659155.526, "dur": 7.799, + "args": { + "External id": 517481,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146659167.545, "dur": 0.913, + "args": { + "External id": 517482,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146659171.254, "dur": 1.073, + "args": { + "External id": 517483,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146659174.236, "dur": 0.805, + "args": { + "External id": 517484,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146659177.052, "dur": 0.908, + "args": { + "External id": 517485,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146659179.593, "dur": 0.834, + "args": { + "External id": 517486,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146659182.197, "dur": 1.010, + "args": { + "External id": 517487,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146659184.634, "dur": 2.453, + "args": { + "External id": 517488,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146659191.212, "dur": 3.184, + "args": { + "External id": 517489,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146659195.669, "dur": 0.611, + "args": { + "External id": 517490,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146659216.508, "dur": 160.112, + "args": { + "External id": 517491,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146659233.580, "dur": 138.255, + "args": { + "External id": 517492,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146659253.494, "dur": 15.715, + "args": { + "External id": 517493,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146659273.145, "dur": 70.257, + "args": { + "External id": 517494,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146659276.493, "dur": 66.592, + "args": { + "External id": 517495,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146659281.039, "dur": 6.006, + "args": { + "External id": 517496,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146659289.424, "dur": 53.089, + "args": { + "External id": 517497,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10616 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.5", "pid": 2338711, "tid": 2379440, + "ts": 6274146659496.600, "dur": 733.593, + "args": { + "External id": 517498,"Record function id": 0, "Ev Idx": 10617 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2338711, "tid": 2379440, + "ts": 6274146659516.662, "dur": 698.861, + "args": { + "External id": 517499,"Record function id": 0, "Ev Idx": 10618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146659579.025, "dur": 5.492, + "args": { + "External id": 517500,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146659628.880, "dur": 45.684, + "args": { + "External id": 517501,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146659636.341, "dur": 2.576, + "args": { + "External id": 517502,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146659642.399, "dur": 0.990, + "args": { + "External id": 517503,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146659646.337, "dur": 1.022, + "args": { + "External id": 517504,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146659649.670, "dur": 2.713, + "args": { + "External id": 517505,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146659654.661, "dur": 0.579, + "args": { + "External id": 517506,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146659658.142, "dur": 0.757, + "args": { + "External id": 517507,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146659660.954, "dur": 0.841, + "args": { + "External id": 517508,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146659664.997, "dur": 0.741, + "args": { + "External id": 517509,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146659668.390, "dur": 0.758, + "args": { + "External id": 517510,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146659693.550, "dur": 52.846, + "args": { + "External id": 517511,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274146659782.205, "dur": 116.750, + "args": { + "External id": 517512,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146659794.477, "dur": 5.014, + "args": { + "External id": 517513,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274146659805.116, "dur": 11.845, + "args": { + "External id": 517514,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274146659810.082, "dur": 6.454, + "args": { + "External id": 517515,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146659814.205, "dur": 0.979, + "args": { + "External id": 517516,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146659823.799, "dur": 24.976, + "args": { + "External id": 517517,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146659826.225, "dur": 2.788, + "args": { + "External id": 517518,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146659830.538, "dur": 0.558, + "args": { + "External id": 517519,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146659832.249, "dur": 0.649, + "args": { + "External id": 517520,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146659834.905, "dur": 0.394, + "args": { + "External id": 517521,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146659836.826, "dur": 0.415, + "args": { + "External id": 517522,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146659838.230, "dur": 0.396, + "args": { + "External id": 517523,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146659840.299, "dur": 0.373, + "args": { + "External id": 517524,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146659842.169, "dur": 0.255, + "args": { + "External id": 517525,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146659843.328, "dur": 2.664, + "args": { + "External id": 517526,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146659859.799, "dur": 31.607, + "args": { + "External id": 517527,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146659951.333, "dur": 177.528, + "args": { + "External id": 517528,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146659987.609, "dur": 136.745, + "args": { + "External id": 517529,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10648, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146659998.066, "dur": 121.252, + "args": { + "External id": 517530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146660151.379, "dur": 2.193, + "args": { + "External id": 517531,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10650, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146660247.306, "dur": 1970.125, + "args": { + "External id": 517532,"Sequence number": 5516253, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10651 + } + }, + { + "ph": "f", "id": 434, "pid": 2338711, "tid": 2379440, "ts": 6274146660247.306, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146660368.482, "dur": 114.553, + "args": { + "External id": 517533,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146660527.424, "dur": 42.960, + "args": { + "External id": 517534,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274146660610.362, "dur": 80.930, + "args": { + "External id": 517535,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146660706.782, "dur": 35.817, + "args": { + "External id": 517536,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146660749.425, "dur": 33.937, + "args": { + "External id": 517537,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146660789.771, "dur": 29.567, + "args": { + "External id": 517538,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146660827.551, "dur": 30.606, + "args": { + "External id": 517539,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146660889.570, "dur": 28.171, + "args": { + "External id": 517540,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146660942.127, "dur": 33.514, + "args": { + "External id": 517541,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146660998.537, "dur": 22.188, + "args": { + "External id": 517542,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146661034.590, "dur": 17.144, + "args": { + "External id": 517543,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146661102.405, "dur": 51.014, + "args": { + "External id": 517544,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146661158.718, "dur": 36.485, + "args": { + "External id": 517545,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274146661228.834, "dur": 270.118, + "args": { + "External id": 517546,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146661315.824, "dur": 7.597, + "args": { + "External id": 517547,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146661326.064, "dur": 3.327, + "args": { + "External id": 517548,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146661330.956, "dur": 2.429, + "args": { + "External id": 517549,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146661334.636, "dur": 5.094, + "args": { + "External id": 517550,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146661390.777, "dur": 5.907, + "args": { + "External id": 517551,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146661393.544, "dur": 2.961, + "args": { + "External id": 517552,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146661398.663, "dur": 34.588, + "args": { + "External id": 517553,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146661404.909, "dur": 1.819, + "args": { + "External id": 517554,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146661434.790, "dur": 1.734, + "args": { + "External id": 517555,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146661435.640, "dur": 0.810, + "args": { + "External id": 517556,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146661437.744, "dur": 17.393, + "args": { + "External id": 517557,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146661439.785, "dur": 0.572, + "args": { + "External id": 517558,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146661534.091, "dur": 48.935, + "args": { + "External id": 517559,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146661639.949, "dur": 29.704, + "args": { + "External id": 517560,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146661684.152, "dur": 59.967, + "args": { + "External id": 517561,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146661751.651, "dur": 48.710, + "args": { + "External id": 517562,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146661808.896, "dur": 23.478, + "args": { + "External id": 517563,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146661841.071, "dur": 34.484, + "args": { + "External id": 517564,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146661883.465, "dur": 29.706, + "args": { + "External id": 517565,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146661920.459, "dur": 31.629, + "args": { + "External id": 517566,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274146661970.643, "dur": 26.451, + "args": { + "External id": 517567,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146662013.223, "dur": 27.289, + "args": { + "External id": 517568,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146662054.431, "dur": 56.809, + "args": { + "External id": 517569,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146662130.867, "dur": 16.991, + "args": { + "External id": 517570,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274146662160.984, "dur": 19.871, + "args": { + "External id": 517571,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146662267.171, "dur": 17.588, + "args": { + "External id": 517572,"Record function id": 0, "Ev Idx": 10691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146662270.972, "dur": 12.737, + "args": { + "External id": 517573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146662275.921, "dur": 6.824, + "args": { + "External id": 517574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146662278.111, "dur": 4.508, + "args": { + "External id": 517575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146662288.704, "dur": 4.822, + "args": { + "External id": 517576,"Record function id": 0, "Ev Idx": 10695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146662290.005, "dur": 3.106, + "args": { + "External id": 517577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146662290.744, "dur": 1.902, + "args": { + "External id": 517578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146662291.518, "dur": 1.013, + "args": { + "External id": 517579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146662296.809, "dur": 4.757, + "args": { + "External id": 517580,"Record function id": 0, "Ev Idx": 10699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146662298.314, "dur": 2.758, + "args": { + "External id": 517581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146662298.948, "dur": 1.613, + "args": { + "External id": 517582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146662299.292, "dur": 1.198, + "args": { + "External id": 517583,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146662304.739, "dur": 4.392, + "args": { + "External id": 517584,"Record function id": 0, "Ev Idx": 10703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146662306.346, "dur": 2.339, + "args": { + "External id": 517585,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146662306.954, "dur": 1.269, + "args": { + "External id": 517586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146662307.551, "dur": 0.600, + "args": { + "External id": 517587,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146662312.196, "dur": 3.972, + "args": { + "External id": 517588,"Record function id": 0, "Ev Idx": 10707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146662313.424, "dur": 2.346, + "args": { + "External id": 517589,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146662313.949, "dur": 1.262, + "args": { + "External id": 517590,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146662314.358, "dur": 0.767, + "args": { + "External id": 517591,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146662319.186, "dur": 3.779, + "args": { + "External id": 517592,"Record function id": 0, "Ev Idx": 10711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146662320.178, "dur": 2.356, + "args": { + "External id": 517593,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146662320.768, "dur": 1.290, + "args": { + "External id": 517594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146662321.231, "dur": 0.713, + "args": { + "External id": 517595,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146662326.118, "dur": 6.293, + "args": { + "External id": 517596,"Record function id": 0, "Ev Idx": 10715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146662327.090, "dur": 4.875, + "args": { + "External id": 517597,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146662327.579, "dur": 3.918, + "args": { + "External id": 517598,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146662327.892, "dur": 3.512, + "args": { + "External id": 517599,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146662335.461, "dur": 4.135, + "args": { + "External id": 517600,"Record function id": 0, "Ev Idx": 10719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146662336.710, "dur": 2.481, + "args": { + "External id": 517601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146662337.229, "dur": 1.371, + "args": { + "External id": 517602,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146662337.683, "dur": 0.808, + "args": { + "External id": 517603,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146662343.279, "dur": 3.818, + "args": { + "External id": 517604,"Record function id": 0, "Ev Idx": 10723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146662344.474, "dur": 2.208, + "args": { + "External id": 517605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146662344.976, "dur": 1.225, + "args": { + "External id": 517606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146662345.497, "dur": 0.572, + "args": { + "External id": 517607,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146662351.391, "dur": 64744.818, + "args": { + "External id": 517608,"Record function id": 0, "Sequence number": 5516252, "Fwd thread id": 1, "Ev Idx": 10727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146662352.975, "dur": 64695.639, + "args": { + "External id": 517609,"Sequence number": 5516252, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10728 + } + }, + { + "ph": "f", "id": 435, "pid": 2338711, "tid": 2379440, "ts": 6274146662352.975, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.6)", "pid": 2338711, "tid": 2379440, + "ts": 6274146662387.006, "dur": 42.329, + "args": { + "External id": 517610,"Record function id": 0, "Ev Idx": 10729 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.6)", "pid": 2338711, "tid": 2379440, + "ts": 6274146662438.495, "dur": 69.901, + "args": { + "External id": 517611,"Record function id": 0, "Ev Idx": 10730 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.6)", "pid": 2338711, "tid": 2379440, + "ts": 6274146662515.235, "dur": 64524.166, + "args": { + "External id": 517612,"Record function id": 0, "Ev Idx": 10731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146662639.322, "dur": 10.459, + "args": { + "External id": 517613,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146662664.147, "dur": 6.151, + "args": { + "External id": 517614,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146662687.355, "dur": 63052.169, + "args": { + "External id": 517615,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146662702.669, "dur": 63022.424, + "args": { + "External id": 517616,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146662814.958, "dur": 20.421, + "args": { + "External id": 517617,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146662856.158, "dur": 62819.315, + "args": { + "External id": 517618,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146662859.314, "dur": 62814.836, + "args": { + "External id": 517619,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146662864.405, "dur": 9.905, + "args": { + "External id": 517620,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146662876.543, "dur": 62790.707, + "args": { + "External id": 517621,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146725862.654, "dur": 14.630, + "args": { + "External id": 517622,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146725866.654, "dur": 10.270, + "args": { + "External id": 517623,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146725911.353, "dur": 492.450, + "args": { + "External id": 517624,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146725945.910, "dur": 450.853, + "args": { + "External id": 517625,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10744, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146725958.371, "dur": 429.427, + "args": { + "External id": 517626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146726462.235, "dur": 3.045, + "args": { + "External id": 517627,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10746, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146726623.442, "dur": 9.389, + "args": { + "External id": 517628,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146726665.821, "dur": 63.164, + "args": { + "External id": 517629,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146726742.545, "dur": 4.722, + "args": { + "External id": 517630,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146726754.700, "dur": 15.527, + "args": { + "External id": 517631,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146726777.175, "dur": 1.160, + "args": { + "External id": 517632,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146726832.551, "dur": 13.004, + "args": { + "External id": 517633,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146726854.150, "dur": 1.127, + "args": { + "External id": 517634,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146726859.352, "dur": 10.575, + "args": { + "External id": 517635,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146726874.822, "dur": 0.734, + "args": { + "External id": 517636,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146726879.409, "dur": 10.365, + "args": { + "External id": 517637,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146726893.844, "dur": 0.739, + "args": { + "External id": 517638,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146726897.624, "dur": 10.181, + "args": { + "External id": 517639,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146726914.245, "dur": 0.829, + "args": { + "External id": 517640,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146726919.093, "dur": 10.167, + "args": { + "External id": 517641,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146726933.149, "dur": 0.770, + "args": { + "External id": 517642,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146726938.221, "dur": 10.177, + "args": { + "External id": 517643,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146726952.721, "dur": 0.729, + "args": { + "External id": 517644,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146726958.017, "dur": 10.475, + "args": { + "External id": 517645,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146727117.494, "dur": 3349.697, + "args": { + "External id": 517646,"Record function id": 0, "Ev Idx": 10765 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.5)", "pid": 2338711, "tid": 2379440, + "ts": 6274146727145.535, "dur": 1304.191, + "args": { + "External id": 517647,"Record function id": 0, "Ev Idx": 10766 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2338711, "tid": 2379440, + "ts": 6274146727164.201, "dur": 468.141, + "args": { + "External id": 517648,"Record function id": 0, "Ev Idx": 10767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146727261.230, "dur": 7.760, + "args": { + "External id": 517649,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146727272.625, "dur": 1.076, + "args": { + "External id": 517650,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146727276.137, "dur": 1.110, + "args": { + "External id": 517651,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146727279.057, "dur": 0.825, + "args": { + "External id": 517652,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146727281.950, "dur": 0.673, + "args": { + "External id": 517653,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146727284.140, "dur": 0.849, + "args": { + "External id": 517654,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146727286.503, "dur": 0.680, + "args": { + "External id": 517655,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146727289.043, "dur": 2.141, + "args": { + "External id": 517656,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146727294.769, "dur": 2.936, + "args": { + "External id": 517657,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146727298.979, "dur": 0.517, + "args": { + "External id": 517658,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146727319.096, "dur": 250.658, + "args": { + "External id": 517659,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146727336.479, "dur": 226.717, + "args": { + "External id": 517660,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146727355.762, "dur": 16.838, + "args": { + "External id": 517661,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146727377.719, "dur": 156.676, + "args": { + "External id": 517662,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146727383.706, "dur": 150.048, + "args": { + "External id": 517663,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146727389.299, "dur": 24.100, + "args": { + "External id": 517664,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146727417.979, "dur": 114.825, + "args": { + "External id": 517665,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10784 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.4", "pid": 2338711, "tid": 2379440, + "ts": 6274146727757.576, "dur": 684.410, + "args": { + "External id": 517666,"Record function id": 0, "Ev Idx": 10785 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2338711, "tid": 2379440, + "ts": 6274146727779.375, "dur": 648.999, + "args": { + "External id": 517667,"Record function id": 0, "Ev Idx": 10786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146727850.122, "dur": 8.232, + "args": { + "External id": 517668,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146727876.046, "dur": 35.669, + "args": { + "External id": 517669,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146727883.032, "dur": 1.927, + "args": { + "External id": 517670,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146727886.591, "dur": 0.755, + "args": { + "External id": 517671,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146727889.236, "dur": 0.611, + "args": { + "External id": 517672,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146727891.450, "dur": 6.147, + "args": { + "External id": 517673,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146727898.667, "dur": 0.583, + "args": { + "External id": 517674,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146727901.247, "dur": 0.411, + "args": { + "External id": 517675,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146727903.051, "dur": 0.272, + "args": { + "External id": 517676,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146727904.326, "dur": 0.413, + "args": { + "External id": 517677,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146727906.384, "dur": 0.531, + "args": { + "External id": 517678,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146727923.692, "dur": 55.131, + "args": { + "External id": 517679,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274146728012.151, "dur": 160.798, + "args": { + "External id": 517680,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146728023.835, "dur": 3.468, + "args": { + "External id": 517681,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274146728032.442, "dur": 11.431, + "args": { + "External id": 517682,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274146728037.412, "dur": 6.049, + "args": { + "External id": 517683,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146728041.508, "dur": 0.654, + "args": { + "External id": 517684,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146728050.915, "dur": 65.549, + "args": { + "External id": 517685,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146728090.757, "dur": 2.628, + "args": { + "External id": 517686,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146728095.798, "dur": 0.454, + "args": { + "External id": 517687,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146728098.295, "dur": 0.465, + "args": { + "External id": 517688,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146728100.278, "dur": 0.593, + "args": { + "External id": 517689,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146728102.137, "dur": 0.476, + "args": { + "External id": 517690,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146728104.679, "dur": 0.453, + "args": { + "External id": 517691,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146728106.904, "dur": 0.335, + "args": { + "External id": 517692,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146728108.435, "dur": 0.362, + "args": { + "External id": 517693,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146728111.052, "dur": 2.427, + "args": { + "External id": 517694,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146728128.484, "dur": 36.580, + "args": { + "External id": 517695,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146728226.245, "dur": 125.195, + "args": { + "External id": 517696,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146728252.532, "dur": 95.003, + "args": { + "External id": 517697,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10816, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146728263.438, "dur": 79.151, + "args": { + "External id": 517698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146728370.837, "dur": 1.845, + "args": { + "External id": 517699,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10818, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146728457.261, "dur": 1987.426, + "args": { + "External id": 517700,"Sequence number": 5516251, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10819 + } + }, + { + "ph": "f", "id": 436, "pid": 2338711, "tid": 2379440, "ts": 6274146728457.261, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146728578.532, "dur": 156.032, + "args": { + "External id": 517701,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146728789.621, "dur": 45.635, + "args": { + "External id": 517702,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274146728857.865, "dur": 54.267, + "args": { + "External id": 517703,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146728923.047, "dur": 33.515, + "args": { + "External id": 517704,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146728962.907, "dur": 33.413, + "args": { + "External id": 517705,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146729002.699, "dur": 29.498, + "args": { + "External id": 517706,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146729045.391, "dur": 75.396, + "args": { + "External id": 517707,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146729155.941, "dur": 25.764, + "args": { + "External id": 517708,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146729203.558, "dur": 30.943, + "args": { + "External id": 517709,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146729257.168, "dur": 19.356, + "args": { + "External id": 517710,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146729290.650, "dur": 29.747, + "args": { + "External id": 517711,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146729341.562, "dur": 53.463, + "args": { + "External id": 517712,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146729398.972, "dur": 39.899, + "args": { + "External id": 517713,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274146729472.336, "dur": 308.536, + "args": { + "External id": 517714,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146729559.747, "dur": 6.927, + "args": { + "External id": 517715,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146729568.959, "dur": 3.322, + "args": { + "External id": 517716,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146729573.694, "dur": 2.741, + "args": { + "External id": 517717,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146729577.710, "dur": 4.793, + "args": { + "External id": 517718,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146729662.812, "dur": 6.423, + "args": { + "External id": 517719,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146729665.099, "dur": 3.568, + "args": { + "External id": 517720,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146729671.289, "dur": 37.049, + "args": { + "External id": 517721,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146729678.026, "dur": 1.916, + "args": { + "External id": 517722,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146729709.990, "dur": 1.939, + "args": { + "External id": 517723,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146729711.242, "dur": 0.576, + "args": { + "External id": 517724,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146729713.331, "dur": 17.293, + "args": { + "External id": 517725,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146729715.448, "dur": 0.685, + "args": { + "External id": 517726,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146729821.878, "dur": 31.757, + "args": { + "External id": 517727,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146729885.846, "dur": 20.427, + "args": { + "External id": 517728,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146729916.560, "dur": 56.164, + "args": { + "External id": 517729,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146729980.261, "dur": 44.240, + "args": { + "External id": 517730,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146730034.025, "dur": 61.110, + "args": { + "External id": 517731,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146730108.404, "dur": 38.922, + "args": { + "External id": 517732,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146730156.089, "dur": 29.935, + "args": { + "External id": 517733,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146730193.692, "dur": 33.638, + "args": { + "External id": 517734,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274146730249.249, "dur": 26.475, + "args": { + "External id": 517735,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146730292.630, "dur": 26.027, + "args": { + "External id": 517736,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146730332.685, "dur": 17.125, + "args": { + "External id": 517737,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146730364.314, "dur": 14.508, + "args": { + "External id": 517738,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274146730390.993, "dur": 16.712, + "args": { + "External id": 517739,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146730492.642, "dur": 16.979, + "args": { + "External id": 517740,"Record function id": 0, "Ev Idx": 10859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146730495.989, "dur": 12.629, + "args": { + "External id": 517741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146730500.579, "dur": 7.105, + "args": { + "External id": 517742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146730503.076, "dur": 4.480, + "args": { + "External id": 517743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146730513.477, "dur": 5.588, + "args": { + "External id": 517744,"Record function id": 0, "Ev Idx": 10863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146730514.905, "dur": 3.726, + "args": { + "External id": 517745,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146730515.698, "dur": 2.397, + "args": { + "External id": 517746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146730516.704, "dur": 1.259, + "args": { + "External id": 517747,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146730522.389, "dur": 4.736, + "args": { + "External id": 517748,"Record function id": 0, "Ev Idx": 10867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146730523.873, "dur": 2.839, + "args": { + "External id": 517749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146730524.481, "dur": 1.793, + "args": { + "External id": 517750,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146730525.219, "dur": 0.992, + "args": { + "External id": 517751,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146730530.293, "dur": 3.486, + "args": { + "External id": 517752,"Record function id": 0, "Ev Idx": 10871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146730531.164, "dur": 2.206, + "args": { + "External id": 517753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146730531.799, "dur": 1.165, + "args": { + "External id": 517754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146730532.132, "dur": 0.761, + "args": { + "External id": 517755,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146730536.839, "dur": 4.454, + "args": { + "External id": 517756,"Record function id": 0, "Ev Idx": 10875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146730538.201, "dur": 2.686, + "args": { + "External id": 517757,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146730538.705, "dur": 1.433, + "args": { + "External id": 517758,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146730539.182, "dur": 0.847, + "args": { + "External id": 517759,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146730544.425, "dur": 6.331, + "args": { + "External id": 517760,"Record function id": 0, "Ev Idx": 10879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146730545.685, "dur": 4.653, + "args": { + "External id": 517761,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146730546.186, "dur": 3.707, + "args": { + "External id": 517762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146730546.657, "dur": 3.120, + "args": { + "External id": 517763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146730553.991, "dur": 3.994, + "args": { + "External id": 517764,"Record function id": 0, "Ev Idx": 10883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146730555.258, "dur": 2.308, + "args": { + "External id": 517765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146730555.739, "dur": 1.394, + "args": { + "External id": 517766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146730556.170, "dur": 0.879, + "args": { + "External id": 517767,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146730561.153, "dur": 4.360, + "args": { + "External id": 517768,"Record function id": 0, "Ev Idx": 10887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146730562.265, "dur": 2.850, + "args": { + "External id": 517769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146730562.946, "dur": 1.666, + "args": { + "External id": 517770,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146730563.566, "dur": 0.961, + "args": { + "External id": 517771,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146730568.527, "dur": 4.325, + "args": { + "External id": 517772,"Record function id": 0, "Ev Idx": 10891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146730569.610, "dur": 2.826, + "args": { + "External id": 517773,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146730570.377, "dur": 1.602, + "args": { + "External id": 517774,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146730571.107, "dur": 0.746, + "args": { + "External id": 517775,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146730577.085, "dur": 63455.769, + "args": { + "External id": 517776,"Record function id": 0, "Sequence number": 5516250, "Fwd thread id": 1, "Ev Idx": 10895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146730578.665, "dur": 63444.315, + "args": { + "External id": 517777,"Sequence number": 5516250, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10896 + } + }, + { + "ph": "f", "id": 437, "pid": 2338711, "tid": 2379440, "ts": 6274146730578.665, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.5)", "pid": 2338711, "tid": 2379440, + "ts": 6274146730637.106, "dur": 49.069, + "args": { + "External id": 517778,"Record function id": 0, "Ev Idx": 10897 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.5)", "pid": 2338711, "tid": 2379440, + "ts": 6274146730696.694, "dur": 72.781, + "args": { + "External id": 517779,"Record function id": 0, "Ev Idx": 10898 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.5)", "pid": 2338711, "tid": 2379440, + "ts": 6274146730776.251, "dur": 63238.522, + "args": { + "External id": 517780,"Record function id": 0, "Ev Idx": 10899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146730876.582, "dur": 8.796, + "args": { + "External id": 517781,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146730896.693, "dur": 5.547, + "args": { + "External id": 517782,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146730919.191, "dur": 62062.392, + "args": { + "External id": 517783,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146730934.152, "dur": 62033.033, + "args": { + "External id": 517784,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146731051.277, "dur": 65.839, + "args": { + "External id": 517785,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146731141.364, "dur": 61775.743, + "args": { + "External id": 517786,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146731144.925, "dur": 61771.061, + "args": { + "External id": 517787,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146731150.564, "dur": 16.532, + "args": { + "External id": 517788,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146731169.281, "dur": 61741.279, + "args": { + "External id": 517789,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146793140.603, "dur": 14.030, + "args": { + "External id": 517790,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146793144.392, "dur": 9.677, + "args": { + "External id": 517791,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146793188.627, "dur": 390.529, + "args": { + "External id": 517792,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146793227.187, "dur": 346.719, + "args": { + "External id": 517793,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10912, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146793239.963, "dur": 327.840, + "args": { + "External id": 517794,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146793624.836, "dur": 4.265, + "args": { + "External id": 517795,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10914, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146793707.122, "dur": 8.066, + "args": { + "External id": 517796,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146793728.848, "dur": 40.813, + "args": { + "External id": 517797,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146793780.680, "dur": 4.021, + "args": { + "External id": 517798,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146793790.265, "dur": 12.387, + "args": { + "External id": 517799,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146793809.066, "dur": 0.859, + "args": { + "External id": 517800,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146793814.130, "dur": 10.674, + "args": { + "External id": 517801,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146793830.435, "dur": 0.765, + "args": { + "External id": 517802,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146793835.624, "dur": 10.395, + "args": { + "External id": 517803,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146793850.471, "dur": 1.117, + "args": { + "External id": 517804,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146793856.077, "dur": 11.302, + "args": { + "External id": 517805,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146793871.453, "dur": 0.878, + "args": { + "External id": 517806,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146793876.520, "dur": 10.062, + "args": { + "External id": 517807,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146793890.955, "dur": 0.898, + "args": { + "External id": 517808,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146793896.604, "dur": 11.194, + "args": { + "External id": 517809,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146793911.984, "dur": 0.953, + "args": { + "External id": 517810,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146793917.261, "dur": 10.525, + "args": { + "External id": 517811,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146793933.751, "dur": 1.389, + "args": { + "External id": 517812,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146793939.014, "dur": 10.476, + "args": { + "External id": 517813,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146794050.763, "dur": 3251.842, + "args": { + "External id": 517814,"Record function id": 0, "Ev Idx": 10933 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.4)", "pid": 2338711, "tid": 2379440, + "ts": 6274146794111.155, "dur": 1194.810, + "args": { + "External id": 517815,"Record function id": 0, "Ev Idx": 10934 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2338711, "tid": 2379440, + "ts": 6274146794129.043, "dur": 343.643, + "args": { + "External id": 517816,"Record function id": 0, "Ev Idx": 10935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146794221.877, "dur": 7.395, + "args": { + "External id": 517817,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146794232.940, "dur": 0.916, + "args": { + "External id": 517818,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146794235.937, "dur": 1.087, + "args": { + "External id": 517819,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146794238.819, "dur": 1.029, + "args": { + "External id": 517820,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146794241.923, "dur": 0.767, + "args": { + "External id": 517821,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146794244.150, "dur": 0.830, + "args": { + "External id": 517822,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146794246.546, "dur": 0.899, + "args": { + "External id": 517823,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146794251.190, "dur": 2.217, + "args": { + "External id": 517824,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146794254.989, "dur": 2.429, + "args": { + "External id": 517825,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146794258.508, "dur": 0.774, + "args": { + "External id": 517826,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146794278.336, "dur": 161.581, + "args": { + "External id": 517827,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146794296.122, "dur": 139.425, + "args": { + "External id": 517828,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146794314.737, "dur": 16.217, + "args": { + "External id": 517829,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146794335.346, "dur": 70.758, + "args": { + "External id": 517830,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146794338.757, "dur": 67.058, + "args": { + "External id": 517831,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146794343.860, "dur": 5.397, + "args": { + "External id": 517832,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146794351.263, "dur": 53.859, + "args": { + "External id": 517833,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10952 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.3", "pid": 2338711, "tid": 2379440, + "ts": 6274146794567.546, "dur": 730.311, + "args": { + "External id": 517834,"Record function id": 0, "Ev Idx": 10953 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2338711, "tid": 2379440, + "ts": 6274146794585.817, "dur": 698.102, + "args": { + "External id": 517835,"Record function id": 0, "Ev Idx": 10954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146794686.959, "dur": 8.492, + "args": { + "External id": 517836,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146794716.463, "dur": 38.840, + "args": { + "External id": 517837,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146794723.291, "dur": 2.077, + "args": { + "External id": 517838,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146794729.058, "dur": 0.710, + "args": { + "External id": 517839,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146794732.533, "dur": 1.187, + "args": { + "External id": 517840,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146794736.232, "dur": 2.887, + "args": { + "External id": 517841,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146794740.389, "dur": 0.650, + "args": { + "External id": 517842,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146794743.281, "dur": 0.503, + "args": { + "External id": 517843,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146794745.838, "dur": 0.366, + "args": { + "External id": 517844,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146794747.458, "dur": 0.627, + "args": { + "External id": 517845,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146794749.909, "dur": 0.651, + "args": { + "External id": 517846,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146794767.298, "dur": 50.082, + "args": { + "External id": 517847,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274146794855.227, "dur": 124.975, + "args": { + "External id": 517848,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146794866.620, "dur": 3.232, + "args": { + "External id": 517849,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274146794875.341, "dur": 10.868, + "args": { + "External id": 517850,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274146794880.312, "dur": 5.475, + "args": { + "External id": 517851,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146794884.141, "dur": 0.393, + "args": { + "External id": 517852,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146794897.254, "dur": 30.747, + "args": { + "External id": 517853,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146794899.163, "dur": 2.964, + "args": { + "External id": 517854,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146794903.601, "dur": 0.424, + "args": { + "External id": 517855,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146794905.923, "dur": 0.580, + "args": { + "External id": 517856,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146794908.143, "dur": 0.304, + "args": { + "External id": 517857,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146794909.710, "dur": 0.581, + "args": { + "External id": 517858,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146794912.350, "dur": 0.331, + "args": { + "External id": 517859,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146794918.084, "dur": 0.383, + "args": { + "External id": 517860,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146794920.047, "dur": 0.484, + "args": { + "External id": 517861,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146794922.332, "dur": 2.841, + "args": { + "External id": 517862,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146794937.530, "dur": 34.668, + "args": { + "External id": 517863,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146795030.047, "dur": 168.900, + "args": { + "External id": 517864,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146795096.582, "dur": 97.805, + "args": { + "External id": 517865,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10984, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146795109.441, "dur": 80.013, + "args": { + "External id": 517866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146795218.905, "dur": 1.945, + "args": { + "External id": 517867,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10986, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146795313.842, "dur": 1966.965, + "args": { + "External id": 517868,"Sequence number": 5516249, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10987 + } + }, + { + "ph": "f", "id": 438, "pid": 2338711, "tid": 2379440, "ts": 6274146795313.842, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146795433.237, "dur": 113.923, + "args": { + "External id": 517869,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146795611.197, "dur": 51.642, + "args": { + "External id": 517870,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274146795691.298, "dur": 73.189, + "args": { + "External id": 517871,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146795781.828, "dur": 37.049, + "args": { + "External id": 517872,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146795826.185, "dur": 34.396, + "args": { + "External id": 517873,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146795867.372, "dur": 28.200, + "args": { + "External id": 517874,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146795904.766, "dur": 30.210, + "args": { + "External id": 517875,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146795968.249, "dur": 25.576, + "args": { + "External id": 517876,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146796014.703, "dur": 29.123, + "args": { + "External id": 517877,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146796110.280, "dur": 22.971, + "args": { + "External id": 517878,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146796148.523, "dur": 16.508, + "args": { + "External id": 517879,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146796181.529, "dur": 43.322, + "args": { + "External id": 517880,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146796229.308, "dur": 36.760, + "args": { + "External id": 517881,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274146796298.845, "dur": 272.302, + "args": { + "External id": 517882,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146796387.219, "dur": 7.682, + "args": { + "External id": 517883,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146796397.153, "dur": 2.985, + "args": { + "External id": 517884,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146796401.739, "dur": 2.192, + "args": { + "External id": 517885,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146796405.348, "dur": 4.707, + "args": { + "External id": 517886,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146796461.573, "dur": 5.587, + "args": { + "External id": 517887,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146796463.893, "dur": 3.084, + "args": { + "External id": 517888,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146796469.302, "dur": 35.818, + "args": { + "External id": 517889,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146796475.483, "dur": 1.890, + "args": { + "External id": 517890,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146796506.839, "dur": 2.160, + "args": { + "External id": 517891,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146796508.183, "dur": 0.730, + "args": { + "External id": 517892,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146796510.217, "dur": 17.560, + "args": { + "External id": 517893,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146796512.284, "dur": 0.800, + "args": { + "External id": 517894,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146796634.420, "dur": 39.147, + "args": { + "External id": 517895,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146796695.636, "dur": 22.009, + "args": { + "External id": 517896,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146796727.503, "dur": 69.344, + "args": { + "External id": 517897,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146796811.718, "dur": 46.892, + "args": { + "External id": 517898,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146796873.354, "dur": 23.392, + "args": { + "External id": 517899,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146796906.001, "dur": 36.796, + "args": { + "External id": 517900,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146796950.169, "dur": 31.578, + "args": { + "External id": 517901,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146796988.793, "dur": 32.079, + "args": { + "External id": 517902,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274146797042.063, "dur": 65.138, + "args": { + "External id": 517903,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146797126.983, "dur": 25.908, + "args": { + "External id": 517904,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146797167.026, "dur": 17.596, + "args": { + "External id": 517905,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146797200.018, "dur": 14.620, + "args": { + "External id": 517906,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274146797227.296, "dur": 17.248, + "args": { + "External id": 517907,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146797328.641, "dur": 17.538, + "args": { + "External id": 517908,"Record function id": 0, "Ev Idx": 11027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146797332.854, "dur": 12.395, + "args": { + "External id": 517909,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146797337.634, "dur": 6.537, + "args": { + "External id": 517910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146797339.542, "dur": 4.497, + "args": { + "External id": 517911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146797350.106, "dur": 5.610, + "args": { + "External id": 517912,"Record function id": 0, "Ev Idx": 11031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146797351.619, "dur": 3.661, + "args": { + "External id": 517913,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146797352.258, "dur": 2.522, + "args": { + "External id": 517914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146797353.257, "dur": 1.361, + "args": { + "External id": 517915,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146797358.986, "dur": 4.354, + "args": { + "External id": 517916,"Record function id": 0, "Ev Idx": 11035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146797360.000, "dur": 2.945, + "args": { + "External id": 517917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146797360.778, "dur": 1.696, + "args": { + "External id": 517918,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146797361.323, "dur": 1.065, + "args": { + "External id": 517919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146797366.510, "dur": 3.422, + "args": { + "External id": 517920,"Record function id": 0, "Ev Idx": 11039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146797367.434, "dur": 2.065, + "args": { + "External id": 517921,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146797367.920, "dur": 1.160, + "args": { + "External id": 517922,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146797368.355, "dur": 0.645, + "args": { + "External id": 517923,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146797372.994, "dur": 4.040, + "args": { + "External id": 517924,"Record function id": 0, "Ev Idx": 11043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146797374.267, "dur": 2.320, + "args": { + "External id": 517925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146797374.971, "dur": 1.106, + "args": { + "External id": 517926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146797375.458, "dur": 0.542, + "args": { + "External id": 517927,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146797380.001, "dur": 4.583, + "args": { + "External id": 517928,"Record function id": 0, "Ev Idx": 11047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146797381.056, "dur": 3.100, + "args": { + "External id": 517929,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146797381.749, "dur": 1.936, + "args": { + "External id": 517930,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146797382.459, "dur": 1.121, + "args": { + "External id": 517931,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146797387.680, "dur": 6.478, + "args": { + "External id": 517932,"Record function id": 0, "Ev Idx": 11051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146797388.967, "dur": 4.747, + "args": { + "External id": 517933,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146797389.635, "dur": 3.646, + "args": { + "External id": 517934,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146797389.901, "dur": 3.296, + "args": { + "External id": 517935,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146797397.132, "dur": 4.891, + "args": { + "External id": 517936,"Record function id": 0, "Ev Idx": 11055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146797398.602, "dur": 3.017, + "args": { + "External id": 517937,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146797399.637, "dur": 1.515, + "args": { + "External id": 517938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146797400.232, "dur": 0.825, + "args": { + "External id": 517939,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146797405.330, "dur": 4.446, + "args": { + "External id": 517940,"Record function id": 0, "Ev Idx": 11059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146797406.689, "dur": 2.632, + "args": { + "External id": 517941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146797407.247, "dur": 1.639, + "args": { + "External id": 517942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146797408.214, "dur": 0.548, + "args": { + "External id": 517943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146797414.184, "dur": 65796.224, + "args": { + "External id": 517944,"Record function id": 0, "Sequence number": 5516248, "Fwd thread id": 1, "Ev Idx": 11063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146797415.676, "dur": 65784.585, + "args": { + "External id": 517945,"Sequence number": 5516248, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11064 + } + }, + { + "ph": "f", "id": 439, "pid": 2338711, "tid": 2379440, "ts": 6274146797415.676, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.4)", "pid": 2338711, "tid": 2379440, + "ts": 6274146797449.713, "dur": 42.042, + "args": { + "External id": 517946,"Record function id": 0, "Ev Idx": 11065 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.4)", "pid": 2338711, "tid": 2379440, + "ts": 6274146797499.388, "dur": 66.860, + "args": { + "External id": 517947,"Record function id": 0, "Ev Idx": 11066 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.4)", "pid": 2338711, "tid": 2379440, + "ts": 6274146797573.388, "dur": 65618.749, + "args": { + "External id": 517948,"Record function id": 0, "Ev Idx": 11067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146797701.839, "dur": 8.194, + "args": { + "External id": 517949,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146797721.598, "dur": 6.000, + "args": { + "External id": 517950,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146797744.027, "dur": 64354.957, + "args": { + "External id": 517951,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146797758.994, "dur": 64325.778, + "args": { + "External id": 517952,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146797871.581, "dur": 19.440, + "args": { + "External id": 517953,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146797912.134, "dur": 64093.648, + "args": { + "External id": 517954,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146797917.641, "dur": 64086.689, + "args": { + "External id": 517955,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146797922.827, "dur": 9.270, + "args": { + "External id": 517956,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146797934.184, "dur": 64063.589, + "args": { + "External id": 517957,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146862222.975, "dur": 13.923, + "args": { + "External id": 517958,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146862226.867, "dur": 9.693, + "args": { + "External id": 517959,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146862269.133, "dur": 458.180, + "args": { + "External id": 517960,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146862302.852, "dur": 417.294, + "args": { + "External id": 517961,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11080, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146862316.801, "dur": 396.801, + "args": { + "External id": 517962,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146862755.472, "dur": 3.030, + "args": { + "External id": 517963,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11082, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146862830.538, "dur": 7.547, + "args": { + "External id": 517964,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146862851.272, "dur": 36.001, + "args": { + "External id": 517965,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146862898.153, "dur": 1.620, + "args": { + "External id": 517966,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146862904.921, "dur": 11.611, + "args": { + "External id": 517967,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146862922.585, "dur": 3.653, + "args": { + "External id": 517968,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146862930.601, "dur": 12.821, + "args": { + "External id": 517969,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146862948.536, "dur": 1.206, + "args": { + "External id": 517970,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146862953.917, "dur": 12.320, + "args": { + "External id": 517971,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146862970.317, "dur": 0.897, + "args": { + "External id": 517972,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146862975.887, "dur": 12.241, + "args": { + "External id": 517973,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146862992.123, "dur": 1.310, + "args": { + "External id": 517974,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146862997.682, "dur": 12.178, + "args": { + "External id": 517975,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146863013.747, "dur": 1.084, + "args": { + "External id": 517976,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146863018.945, "dur": 12.564, + "args": { + "External id": 517977,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146863035.945, "dur": 1.076, + "args": { + "External id": 517978,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146863041.087, "dur": 44.711, + "args": { + "External id": 517979,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146863094.637, "dur": 1.873, + "args": { + "External id": 517980,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146863101.171, "dur": 19.645, + "args": { + "External id": 517981,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146863228.021, "dur": 3142.999, + "args": { + "External id": 517982,"Record function id": 0, "Ev Idx": 11101 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.3)", "pid": 2338711, "tid": 2379440, + "ts": 6274146863254.652, "dur": 1154.539, + "args": { + "External id": 517983,"Record function id": 0, "Ev Idx": 11102 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2338711, "tid": 2379440, + "ts": 6274146863271.910, "dur": 376.024, + "args": { + "External id": 517984,"Record function id": 0, "Ev Idx": 11103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146863366.074, "dur": 4.151, + "args": { + "External id": 517985,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146863373.945, "dur": 3.390, + "args": { + "External id": 517986,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146863379.471, "dur": 1.084, + "args": { + "External id": 517987,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146863382.121, "dur": 1.035, + "args": { + "External id": 517988,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146863387.060, "dur": 0.850, + "args": { + "External id": 517989,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146863389.578, "dur": 1.015, + "args": { + "External id": 517990,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146863392.402, "dur": 0.728, + "args": { + "External id": 517991,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146863394.688, "dur": 2.294, + "args": { + "External id": 517992,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146863400.921, "dur": 0.564, + "args": { + "External id": 517993,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146863402.774, "dur": 5.968, + "args": { + "External id": 517994,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146863427.617, "dur": 157.200, + "args": { + "External id": 517995,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146863445.189, "dur": 135.043, + "args": { + "External id": 517996,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146863462.963, "dur": 14.368, + "args": { + "External id": 517997,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146863481.522, "dur": 65.610, + "args": { + "External id": 517998,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 11117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146863484.574, "dur": 62.173, + "args": { + "External id": 517999,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 11118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146863489.028, "dur": 5.505, + "args": { + "External id": 518000,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146863496.726, "dur": 49.330, + "args": { + "External id": 518001,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 11120 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.2", "pid": 2338711, "tid": 2379440, + "ts": 6274146863759.933, "dur": 641.438, + "args": { + "External id": 518002,"Record function id": 0, "Ev Idx": 11121 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2338711, "tid": 2379440, + "ts": 6274146863780.346, "dur": 608.498, + "args": { + "External id": 518003,"Record function id": 0, "Ev Idx": 11122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146863844.057, "dur": 7.366, + "args": { + "External id": 518004,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146863868.543, "dur": 32.745, + "args": { + "External id": 518005,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146863874.495, "dur": 1.963, + "args": { + "External id": 518006,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146863878.734, "dur": 0.726, + "args": { + "External id": 518007,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146863881.420, "dur": 0.667, + "args": { + "External id": 518008,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146863883.702, "dur": 0.375, + "args": { + "External id": 518009,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146863885.124, "dur": 3.224, + "args": { + "External id": 518010,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146863890.631, "dur": 0.369, + "args": { + "External id": 518011,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146863892.279, "dur": 0.449, + "args": { + "External id": 518012,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146863894.163, "dur": 0.395, + "args": { + "External id": 518013,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146863896.341, "dur": 0.752, + "args": { + "External id": 518014,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146863912.468, "dur": 48.169, + "args": { + "External id": 518015,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274146863993.028, "dur": 155.162, + "args": { + "External id": 518016,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 11135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146864004.068, "dur": 2.862, + "args": { + "External id": 518017,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274146864012.151, "dur": 11.110, + "args": { + "External id": 518018,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274146864017.219, "dur": 5.611, + "args": { + "External id": 518019,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 11138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146864020.905, "dur": 0.709, + "args": { + "External id": 518020,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146864030.261, "dur": 61.226, + "args": { + "External id": 518021,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146864032.115, "dur": 0.529, + "args": { + "External id": 518022,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146864034.363, "dur": 2.875, + "args": { + "External id": 518023,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146864039.116, "dur": 0.468, + "args": { + "External id": 518024,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146864040.922, "dur": 0.329, + "args": { + "External id": 518025,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146864042.561, "dur": 0.477, + "args": { + "External id": 518026,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146864044.737, "dur": 0.386, + "args": { + "External id": 518027,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146864046.410, "dur": 0.665, + "args": { + "External id": 518028,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146864048.049, "dur": 0.491, + "args": { + "External id": 518029,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146864050.426, "dur": 0.500, + "args": { + "External id": 518030,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146864105.304, "dur": 32.698, + "args": { + "External id": 518031,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146864195.817, "dur": 119.392, + "args": { + "External id": 518032,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 11151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146864220.827, "dur": 90.898, + "args": { + "External id": 518033,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11152, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146864233.049, "dur": 74.281, + "args": { + "External id": 518034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 11153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146864333.323, "dur": 2.064, + "args": { + "External id": 518035,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11154, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146864417.072, "dur": 1932.091, + "args": { + "External id": 518036,"Sequence number": 5516247, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11155 + } + }, + { + "ph": "f", "id": 440, "pid": 2338711, "tid": 2379440, "ts": 6274146864417.072, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146864529.144, "dur": 148.214, + "args": { + "External id": 518037,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 11156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146864732.692, "dur": 41.458, + "args": { + "External id": 518038,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 11157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274146864796.954, "dur": 63.274, + "args": { + "External id": 518039,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 11158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146864870.830, "dur": 36.607, + "args": { + "External id": 518040,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146864914.594, "dur": 34.492, + "args": { + "External id": 518041,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146864956.155, "dur": 29.539, + "args": { + "External id": 518042,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146864994.596, "dur": 30.022, + "args": { + "External id": 518043,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146865089.530, "dur": 28.550, + "args": { + "External id": 518044,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 11163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146865141.288, "dur": 29.903, + "args": { + "External id": 518045,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146865195.363, "dur": 19.988, + "args": { + "External id": 518046,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146865229.514, "dur": 16.351, + "args": { + "External id": 518047,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146865257.137, "dur": 42.700, + "args": { + "External id": 518048,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146865304.320, "dur": 34.370, + "args": { + "External id": 518049,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274146865370.774, "dur": 303.022, + "args": { + "External id": 518050,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146865458.617, "dur": 6.496, + "args": { + "External id": 518051,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146865470.746, "dur": 2.628, + "args": { + "External id": 518052,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146865475.079, "dur": 2.219, + "args": { + "External id": 518053,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146865478.692, "dur": 4.422, + "args": { + "External id": 518054,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146865529.862, "dur": 5.456, + "args": { + "External id": 518055,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146865532.191, "dur": 2.899, + "args": { + "External id": 518056,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146865537.719, "dur": 34.588, + "args": { + "External id": 518057,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146865543.462, "dur": 1.867, + "args": { + "External id": 518058,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146865574.217, "dur": 2.139, + "args": { + "External id": 518059,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146865575.487, "dur": 0.797, + "args": { + "External id": 518060,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146865578.022, "dur": 38.685, + "args": { + "External id": 518061,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146865579.900, "dur": 0.540, + "args": { + "External id": 518062,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146865714.211, "dur": 44.535, + "args": { + "External id": 518063,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146865785.567, "dur": 23.977, + "args": { + "External id": 518064,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146865820.387, "dur": 59.297, + "args": { + "External id": 518065,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146865886.842, "dur": 46.761, + "args": { + "External id": 518066,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146865942.757, "dur": 23.731, + "args": { + "External id": 518067,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146865974.415, "dur": 34.204, + "args": { + "External id": 518068,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146866015.982, "dur": 30.127, + "args": { + "External id": 518069,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146866089.372, "dur": 40.827, + "args": { + "External id": 518070,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274146866150.297, "dur": 27.338, + "args": { + "External id": 518071,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146866194.601, "dur": 26.584, + "args": { + "External id": 518072,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146866235.827, "dur": 19.723, + "args": { + "External id": 518073,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146866269.784, "dur": 15.350, + "args": { + "External id": 518074,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274146866297.328, "dur": 18.203, + "args": { + "External id": 518075,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146866395.499, "dur": 16.369, + "args": { + "External id": 518076,"Record function id": 0, "Ev Idx": 11195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146866398.684, "dur": 12.216, + "args": { + "External id": 518077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146866403.086, "dur": 6.722, + "args": { + "External id": 518078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146866405.342, "dur": 4.293, + "args": { + "External id": 518079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146866415.730, "dur": 5.390, + "args": { + "External id": 518080,"Record function id": 0, "Ev Idx": 11199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146866416.848, "dur": 3.759, + "args": { + "External id": 518081,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146866417.773, "dur": 2.342, + "args": { + "External id": 518082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146866418.775, "dur": 1.200, + "args": { + "External id": 518083,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146866424.313, "dur": 4.362, + "args": { + "External id": 518084,"Record function id": 0, "Ev Idx": 11203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146866425.532, "dur": 2.690, + "args": { + "External id": 518085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146866426.255, "dur": 1.543, + "args": { + "External id": 518086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146866426.781, "dur": 0.931, + "args": { + "External id": 518087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146866431.857, "dur": 3.780, + "args": { + "External id": 518088,"Record function id": 0, "Ev Idx": 11207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146866433.068, "dur": 2.143, + "args": { + "External id": 518089,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146866433.595, "dur": 1.217, + "args": { + "External id": 518090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146866434.005, "dur": 0.734, + "args": { + "External id": 518091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146866438.734, "dur": 3.853, + "args": { + "External id": 518092,"Record function id": 0, "Ev Idx": 11211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146866439.808, "dur": 2.395, + "args": { + "External id": 518093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146866440.654, "dur": 1.097, + "args": { + "External id": 518094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146866440.998, "dur": 0.672, + "args": { + "External id": 518095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146866445.613, "dur": 6.312, + "args": { + "External id": 518096,"Record function id": 0, "Ev Idx": 11215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146866446.661, "dur": 4.772, + "args": { + "External id": 518097,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146866447.326, "dur": 3.656, + "args": { + "External id": 518098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146866447.786, "dur": 3.080, + "args": { + "External id": 518099,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146866455.129, "dur": 3.964, + "args": { + "External id": 518100,"Record function id": 0, "Ev Idx": 11219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146866456.339, "dur": 2.315, + "args": { + "External id": 518101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146866456.870, "dur": 1.377, + "args": { + "External id": 518102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146866457.270, "dur": 0.891, + "args": { + "External id": 518103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146866462.080, "dur": 3.840, + "args": { + "External id": 518104,"Record function id": 0, "Ev Idx": 11223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146866463.173, "dur": 2.302, + "args": { + "External id": 518105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146866463.742, "dur": 1.263, + "args": { + "External id": 518106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146866464.217, "dur": 0.689, + "args": { + "External id": 518107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146866469.246, "dur": 4.349, + "args": { + "External id": 518108,"Record function id": 0, "Ev Idx": 11227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146866470.328, "dur": 2.859, + "args": { + "External id": 518109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146866471.228, "dur": 1.385, + "args": { + "External id": 518110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146866471.868, "dur": 0.613, + "args": { + "External id": 518111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146866477.929, "dur": 65776.372, + "args": { + "External id": 518112,"Record function id": 0, "Sequence number": 5516246, "Fwd thread id": 1, "Ev Idx": 11231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146866479.166, "dur": 65764.958, + "args": { + "External id": 518113,"Sequence number": 5516246, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11232 + } + }, + { + "ph": "f", "id": 441, "pid": 2338711, "tid": 2379440, "ts": 6274146866479.166, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.3)", "pid": 2338711, "tid": 2379440, + "ts": 6274146866512.948, "dur": 41.320, + "args": { + "External id": 518114,"Record function id": 0, "Ev Idx": 11233 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.3)", "pid": 2338711, "tid": 2379440, + "ts": 6274146866562.582, "dur": 100.380, + "args": { + "External id": 518115,"Record function id": 0, "Ev Idx": 11234 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.3)", "pid": 2338711, "tid": 2379440, + "ts": 6274146866672.517, "dur": 65563.198, + "args": { + "External id": 518116,"Record function id": 0, "Ev Idx": 11235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146866772.770, "dur": 8.385, + "args": { + "External id": 518117,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146866792.170, "dur": 5.476, + "args": { + "External id": 518118,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146866813.736, "dur": 64327.590, + "args": { + "External id": 518119,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146866828.612, "dur": 64298.880, + "args": { + "External id": 518120,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146866929.480, "dur": 20.011, + "args": { + "External id": 518121,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146866969.483, "dur": 64082.664, + "args": { + "External id": 518122,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146866972.323, "dur": 64078.369, + "args": { + "External id": 518123,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146866977.438, "dur": 9.490, + "args": { + "External id": 518124,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146866988.893, "dur": 64055.543, + "args": { + "External id": 518125,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146931260.668, "dur": 13.211, + "args": { + "External id": 518126,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146931264.355, "dur": 9.146, + "args": { + "External id": 518127,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146931306.571, "dur": 423.621, + "args": { + "External id": 518128,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146931338.828, "dur": 385.664, + "args": { + "External id": 518129,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11248, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146931350.799, "dur": 366.778, + "args": { + "External id": 518130,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146931756.282, "dur": 2.413, + "args": { + "External id": 518131,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11250, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146931827.165, "dur": 7.494, + "args": { + "External id": 518132,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146931847.850, "dur": 37.239, + "args": { + "External id": 518133,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146931896.067, "dur": 1.687, + "args": { + "External id": 518134,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146931903.459, "dur": 13.758, + "args": { + "External id": 518135,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146931923.363, "dur": 3.276, + "args": { + "External id": 518136,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146931931.139, "dur": 13.535, + "args": { + "External id": 518137,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146931957.345, "dur": 1.051, + "args": { + "External id": 518138,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146931963.072, "dur": 11.506, + "args": { + "External id": 518139,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146931979.359, "dur": 0.957, + "args": { + "External id": 518140,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146931984.191, "dur": 10.719, + "args": { + "External id": 518141,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146931999.093, "dur": 1.211, + "args": { + "External id": 518142,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146932004.108, "dur": 9.952, + "args": { + "External id": 518143,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146932020.612, "dur": 0.815, + "args": { + "External id": 518144,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146932051.553, "dur": 49.025, + "args": { + "External id": 518145,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146932131.483, "dur": 1.485, + "args": { + "External id": 518146,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146932137.587, "dur": 10.807, + "args": { + "External id": 518147,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146932152.597, "dur": 0.737, + "args": { + "External id": 518148,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146932157.406, "dur": 10.840, + "args": { + "External id": 518149,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146932276.329, "dur": 3097.937, + "args": { + "External id": 518150,"Record function id": 0, "Ev Idx": 11269 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.2)", "pid": 2338711, "tid": 2379440, + "ts": 6274146932301.306, "dur": 1142.869, + "args": { + "External id": 518151,"Record function id": 0, "Ev Idx": 11270 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2338711, "tid": 2379440, + "ts": 6274146932318.208, "dur": 383.767, + "args": { + "External id": 518152,"Record function id": 0, "Ev Idx": 11271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146932412.310, "dur": 4.431, + "args": { + "External id": 518153,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146932420.517, "dur": 3.498, + "args": { + "External id": 518154,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146932426.318, "dur": 1.017, + "args": { + "External id": 518155,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146932429.260, "dur": 0.868, + "args": { + "External id": 518156,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146932431.989, "dur": 0.888, + "args": { + "External id": 518157,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146932434.298, "dur": 0.998, + "args": { + "External id": 518158,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146932436.784, "dur": 0.726, + "args": { + "External id": 518159,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146932441.387, "dur": 2.149, + "args": { + "External id": 518160,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146932444.709, "dur": 0.800, + "args": { + "External id": 518161,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146932446.742, "dur": 2.995, + "args": { + "External id": 518162,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146932468.112, "dur": 191.891, + "args": { + "External id": 518163,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146932488.775, "dur": 164.816, + "args": { + "External id": 518164,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146932505.738, "dur": 14.202, + "args": { + "External id": 518165,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146932523.709, "dur": 95.196, + "args": { + "External id": 518166,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 11285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146932528.884, "dur": 89.476, + "args": { + "External id": 518167,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 11286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146932532.802, "dur": 9.395, + "args": { + "External id": 518168,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146932543.860, "dur": 73.106, + "args": { + "External id": 518169,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 11288 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.1", "pid": 2338711, "tid": 2379440, + "ts": 6274146932795.322, "dur": 640.679, + "args": { + "External id": 518170,"Record function id": 0, "Ev Idx": 11289 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2338711, "tid": 2379440, + "ts": 6274146932812.950, "dur": 609.695, + "args": { + "External id": 518171,"Record function id": 0, "Ev Idx": 11290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146932877.214, "dur": 6.848, + "args": { + "External id": 518172,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146932900.587, "dur": 31.100, + "args": { + "External id": 518173,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146932906.840, "dur": 1.807, + "args": { + "External id": 518174,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146932910.616, "dur": 0.467, + "args": { + "External id": 518175,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146932912.582, "dur": 0.400, + "args": { + "External id": 518176,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146932914.550, "dur": 0.350, + "args": { + "External id": 518177,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146932916.227, "dur": 2.857, + "args": { + "External id": 518178,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146932920.717, "dur": 0.593, + "args": { + "External id": 518179,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146932922.750, "dur": 0.361, + "args": { + "External id": 518180,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146932924.409, "dur": 0.534, + "args": { + "External id": 518181,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146932926.825, "dur": 0.742, + "args": { + "External id": 518182,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146932942.602, "dur": 43.801, + "args": { + "External id": 518183,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274146933017.479, "dur": 155.355, + "args": { + "External id": 518184,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 11303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146933028.746, "dur": 2.970, + "args": { + "External id": 518185,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274146933036.921, "dur": 10.363, + "args": { + "External id": 518186,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274146933041.275, "dur": 5.606, + "args": { + "External id": 518187,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 11306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146933045.014, "dur": 0.659, + "args": { + "External id": 518188,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146933092.792, "dur": 25.259, + "args": { + "External id": 518189,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146933095.716, "dur": 0.670, + "args": { + "External id": 518190,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146933098.206, "dur": 2.810, + "args": { + "External id": 518191,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146933102.667, "dur": 0.524, + "args": { + "External id": 518192,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146933105.101, "dur": 0.293, + "args": { + "External id": 518193,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146933106.256, "dur": 0.566, + "args": { + "External id": 518194,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146933108.642, "dur": 0.564, + "args": { + "External id": 518195,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146933110.525, "dur": 0.682, + "args": { + "External id": 518196,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146933112.246, "dur": 0.331, + "args": { + "External id": 518197,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146933114.174, "dur": 0.820, + "args": { + "External id": 518198,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146933129.421, "dur": 33.488, + "args": { + "External id": 518199,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146933228.563, "dur": 120.744, + "args": { + "External id": 518200,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 11319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146933255.472, "dur": 90.318, + "args": { + "External id": 518201,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11320, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146933265.811, "dur": 75.658, + "args": { + "External id": 518202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 11321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146933366.525, "dur": 2.006, + "args": { + "External id": 518203,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11322, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146933452.636, "dur": 1899.402, + "args": { + "External id": 518204,"Sequence number": 5516245, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11323 + } + }, + { + "ph": "f", "id": 442, "pid": 2338711, "tid": 2379440, "ts": 6274146933452.636, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146933567.889, "dur": 146.143, + "args": { + "External id": 518205,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 11324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146933766.092, "dur": 44.375, + "args": { + "External id": 518206,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 11325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274146933833.566, "dur": 56.595, + "args": { + "External id": 518207,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 11326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146933900.856, "dur": 34.747, + "args": { + "External id": 518208,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146933941.886, "dur": 33.660, + "args": { + "External id": 518209,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146933984.674, "dur": 28.963, + "args": { + "External id": 518210,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146934022.742, "dur": 74.813, + "args": { + "External id": 518211,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146934129.698, "dur": 28.654, + "args": { + "External id": 518212,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 11331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274146934179.165, "dur": 30.696, + "args": { + "External id": 518213,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146934230.492, "dur": 21.520, + "args": { + "External id": 518214,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146934265.026, "dur": 16.319, + "args": { + "External id": 518215,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146934292.563, "dur": 44.101, + "args": { + "External id": 518216,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146934340.457, "dur": 35.925, + "args": { + "External id": 518217,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274146934406.437, "dur": 297.827, + "args": { + "External id": 518218,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146934488.255, "dur": 6.905, + "args": { + "External id": 518219,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146934497.249, "dur": 2.774, + "args": { + "External id": 518220,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146934502.044, "dur": 2.142, + "args": { + "External id": 518221,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146934505.591, "dur": 3.982, + "args": { + "External id": 518222,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146934558.447, "dur": 5.537, + "args": { + "External id": 518223,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146934560.554, "dur": 3.209, + "args": { + "External id": 518224,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146934565.796, "dur": 56.052, + "args": { + "External id": 518225,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146934571.647, "dur": 1.829, + "args": { + "External id": 518226,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274146934626.015, "dur": 3.089, + "args": { + "External id": 518227,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146934627.807, "dur": 1.073, + "args": { + "External id": 518228,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274146934630.899, "dur": 23.206, + "args": { + "External id": 518229,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146934634.421, "dur": 0.881, + "args": { + "External id": 518230,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274146934742.528, "dur": 28.956, + "args": { + "External id": 518231,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146934788.477, "dur": 17.750, + "args": { + "External id": 518232,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146934815.096, "dur": 50.620, + "args": { + "External id": 518233,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146934886.684, "dur": 51.992, + "args": { + "External id": 518234,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146934950.597, "dur": 22.040, + "args": { + "External id": 518235,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146934980.651, "dur": 35.975, + "args": { + "External id": 518236,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146935024.169, "dur": 65.968, + "args": { + "External id": 518237,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274146935102.136, "dur": 36.107, + "args": { + "External id": 518238,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274146935159.919, "dur": 26.078, + "args": { + "External id": 518239,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146935202.703, "dur": 24.377, + "args": { + "External id": 518240,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274146935240.515, "dur": 18.411, + "args": { + "External id": 518241,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274146935273.413, "dur": 13.941, + "args": { + "External id": 518242,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274146935299.927, "dur": 18.658, + "args": { + "External id": 518243,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146935398.701, "dur": 15.922, + "args": { + "External id": 518244,"Record function id": 0, "Ev Idx": 11363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146935401.757, "dur": 11.812, + "args": { + "External id": 518245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146935406.021, "dur": 6.408, + "args": { + "External id": 518246,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146935408.096, "dur": 4.159, + "args": { + "External id": 518247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146935418.590, "dur": 4.297, + "args": { + "External id": 518248,"Record function id": 0, "Ev Idx": 11367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146935419.620, "dur": 2.805, + "args": { + "External id": 518249,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146935420.433, "dur": 1.501, + "args": { + "External id": 518250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146935421.100, "dur": 0.695, + "args": { + "External id": 518251,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146935426.070, "dur": 3.967, + "args": { + "External id": 518252,"Record function id": 0, "Ev Idx": 11371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146935427.171, "dur": 2.453, + "args": { + "External id": 518253,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146935427.792, "dur": 1.426, + "args": { + "External id": 518254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146935428.347, "dur": 0.809, + "args": { + "External id": 518255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146935433.159, "dur": 3.858, + "args": { + "External id": 518256,"Record function id": 0, "Ev Idx": 11375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146935434.194, "dur": 2.416, + "args": { + "External id": 518257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146935434.905, "dur": 1.281, + "args": { + "External id": 518258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146935435.410, "dur": 0.703, + "args": { + "External id": 518259,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146935450.794, "dur": 4.156, + "args": { + "External id": 518260,"Record function id": 0, "Ev Idx": 11379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146935451.960, "dur": 2.578, + "args": { + "External id": 518261,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146935452.799, "dur": 1.185, + "args": { + "External id": 518262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146935453.307, "dur": 0.589, + "args": { + "External id": 518263,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146935458.015, "dur": 7.874, + "args": { + "External id": 518264,"Record function id": 0, "Ev Idx": 11383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146935459.266, "dur": 6.207, + "args": { + "External id": 518265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146935459.932, "dur": 5.074, + "args": { + "External id": 518266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146935461.624, "dur": 3.278, + "args": { + "External id": 518267,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146935469.076, "dur": 4.332, + "args": { + "External id": 518268,"Record function id": 0, "Ev Idx": 11387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146935470.428, "dur": 2.574, + "args": { + "External id": 518269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146935470.909, "dur": 1.653, + "args": { + "External id": 518270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146935471.601, "dur": 0.857, + "args": { + "External id": 518271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146935476.871, "dur": 4.074, + "args": { + "External id": 518272,"Record function id": 0, "Ev Idx": 11391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146935478.183, "dur": 2.339, + "args": { + "External id": 518273,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146935478.825, "dur": 1.158, + "args": { + "External id": 518274,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146935479.125, "dur": 0.750, + "args": { + "External id": 518275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146935484.531, "dur": 3.742, + "args": { + "External id": 518276,"Record function id": 0, "Ev Idx": 11395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274146935485.555, "dur": 2.314, + "args": { + "External id": 518277,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146935486.053, "dur": 1.372, + "args": { + "External id": 518278,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274146935486.693, "dur": 0.655, + "args": { + "External id": 518279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146935492.548, "dur": 63226.881, + "args": { + "External id": 518280,"Record function id": 0, "Sequence number": 5516244, "Fwd thread id": 1, "Ev Idx": 11399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146935494.115, "dur": 63214.575, + "args": { + "External id": 518281,"Sequence number": 5516244, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11400 + } + }, + { + "ph": "f", "id": 443, "pid": 2338711, "tid": 2379440, "ts": 6274146935494.115, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.2)", "pid": 2338711, "tid": 2379440, + "ts": 6274146935527.025, "dur": 43.212, + "args": { + "External id": 518282,"Record function id": 0, "Ev Idx": 11401 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.2)", "pid": 2338711, "tid": 2379440, + "ts": 6274146935578.354, "dur": 108.588, + "args": { + "External id": 518283,"Record function id": 0, "Ev Idx": 11402 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.2)", "pid": 2338711, "tid": 2379440, + "ts": 6274146935697.547, "dur": 63002.436, + "args": { + "External id": 518284,"Record function id": 0, "Ev Idx": 11403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146935801.196, "dur": 9.050, + "args": { + "External id": 518285,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146935821.573, "dur": 5.630, + "args": { + "External id": 518286,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146935843.337, "dur": 61834.622, + "args": { + "External id": 518287,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274146935859.710, "dur": 61804.342, + "args": { + "External id": 518288,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146935954.221, "dur": 18.210, + "args": { + "External id": 518289,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146935992.805, "dur": 61613.673, + "args": { + "External id": 518290,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146935995.633, "dur": 61609.413, + "args": { + "External id": 518291,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146936000.532, "dur": 9.396, + "args": { + "External id": 518292,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146936012.098, "dur": 61573.319, + "args": { + "External id": 518293,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146997796.327, "dur": 13.451, + "args": { + "External id": 518294,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146997800.048, "dur": 9.301, + "args": { + "External id": 518295,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146997843.526, "dur": 422.355, + "args": { + "External id": 518296,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146997875.028, "dur": 384.953, + "args": { + "External id": 518297,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11416, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146997887.205, "dur": 362.537, + "args": { + "External id": 518298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146998289.431, "dur": 2.573, + "args": { + "External id": 518299,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11418, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146998361.409, "dur": 7.465, + "args": { + "External id": 518300,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146998385.342, "dur": 35.279, + "args": { + "External id": 518301,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146998432.091, "dur": 3.722, + "args": { + "External id": 518302,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146998441.615, "dur": 12.194, + "args": { + "External id": 518303,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146998459.572, "dur": 1.070, + "args": { + "External id": 518304,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146998465.344, "dur": 11.666, + "args": { + "External id": 518305,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146998482.871, "dur": 0.686, + "args": { + "External id": 518306,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146998487.657, "dur": 9.989, + "args": { + "External id": 518307,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146998502.191, "dur": 0.902, + "args": { + "External id": 518308,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146998507.515, "dur": 11.318, + "args": { + "External id": 518309,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146998522.935, "dur": 1.191, + "args": { + "External id": 518310,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146998528.126, "dur": 10.236, + "args": { + "External id": 518311,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146998544.279, "dur": 0.701, + "args": { + "External id": 518312,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146998549.436, "dur": 10.256, + "args": { + "External id": 518313,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146998564.096, "dur": 0.863, + "args": { + "External id": 518314,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146998569.172, "dur": 10.661, + "args": { + "External id": 518315,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146998583.965, "dur": 0.890, + "args": { + "External id": 518316,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274146998607.340, "dur": 18.004, + "args": { + "External id": 518317,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146998738.171, "dur": 3138.989, + "args": { + "External id": 518318,"Record function id": 0, "Ev Idx": 11437 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.1)", "pid": 2338711, "tid": 2379440, + "ts": 6274146998762.064, "dur": 1166.245, + "args": { + "External id": 518319,"Record function id": 0, "Ev Idx": 11438 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2338711, "tid": 2379440, + "ts": 6274146998779.712, "dur": 401.019, + "args": { + "External id": 518320,"Record function id": 0, "Ev Idx": 11439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146998868.744, "dur": 6.438, + "args": { + "External id": 518321,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146998879.082, "dur": 0.825, + "args": { + "External id": 518322,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146998881.998, "dur": 1.174, + "args": { + "External id": 518323,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146998884.750, "dur": 0.872, + "args": { + "External id": 518324,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146998887.871, "dur": 0.638, + "args": { + "External id": 518325,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146998889.971, "dur": 0.893, + "args": { + "External id": 518326,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146998893.749, "dur": 0.803, + "args": { + "External id": 518327,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146998896.418, "dur": 1.944, + "args": { + "External id": 518328,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146998899.536, "dur": 2.838, + "args": { + "External id": 518329,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274146998903.711, "dur": 0.571, + "args": { + "External id": 518330,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146998933.649, "dur": 204.482, + "args": { + "External id": 518331,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146998954.769, "dur": 177.349, + "args": { + "External id": 518332,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146998972.371, "dur": 14.885, + "args": { + "External id": 518333,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274146998991.287, "dur": 106.096, + "args": { + "External id": 518334,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 11453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274146998994.114, "dur": 102.830, + "args": { + "External id": 518335,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 11454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146998998.136, "dur": 6.301, + "args": { + "External id": 518336,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146999006.295, "dur": 89.310, + "args": { + "External id": 518337,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 11456 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.0", "pid": 2338711, "tid": 2379440, + "ts": 6274146999275.379, "dur": 645.022, + "args": { + "External id": 518338,"Record function id": 0, "Ev Idx": 11457 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2338711, "tid": 2379440, + "ts": 6274146999295.011, "dur": 612.073, + "args": { + "External id": 518339,"Record function id": 0, "Ev Idx": 11458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146999359.080, "dur": 6.045, + "args": { + "External id": 518340,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146999380.953, "dur": 30.595, + "args": { + "External id": 518341,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146999386.938, "dur": 1.836, + "args": { + "External id": 518342,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146999390.642, "dur": 0.306, + "args": { + "External id": 518343,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146999392.904, "dur": 0.705, + "args": { + "External id": 518344,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146999395.292, "dur": 2.828, + "args": { + "External id": 518345,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146999399.275, "dur": 0.490, + "args": { + "External id": 518346,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146999401.396, "dur": 0.363, + "args": { + "External id": 518347,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146999403.522, "dur": 0.274, + "args": { + "External id": 518348,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146999405.180, "dur": 0.585, + "args": { + "External id": 518349,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146999407.258, "dur": 0.343, + "args": { + "External id": 518350,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146999422.124, "dur": 43.059, + "args": { + "External id": 518351,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2379440, + "ts": 6274146999495.765, "dur": 139.603, + "args": { + "External id": 518352,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 11471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274146999506.373, "dur": 3.294, + "args": { + "External id": 518353,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2379440, + "ts": 6274146999514.781, "dur": 10.580, + "args": { + "External id": 518354,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2379440, + "ts": 6274146999519.541, "dur": 5.393, + "args": { + "External id": 518355,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 11474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146999523.099, "dur": 0.628, + "args": { + "External id": 518356,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2379440, + "ts": 6274146999532.012, "dur": 25.947, + "args": { + "External id": 518357,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146999533.780, "dur": 2.423, + "args": { + "External id": 518358,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146999538.014, "dur": 0.385, + "args": { + "External id": 518359,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146999540.227, "dur": 0.304, + "args": { + "External id": 518360,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146999541.902, "dur": 0.652, + "args": { + "External id": 518361,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146999544.196, "dur": 0.509, + "args": { + "External id": 518362,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146999546.415, "dur": 0.339, + "args": { + "External id": 518363,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146999547.921, "dur": 0.721, + "args": { + "External id": 518364,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146999550.341, "dur": 0.486, + "args": { + "External id": 518365,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274146999552.481, "dur": 2.830, + "args": { + "External id": 518366,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274146999572.174, "dur": 52.121, + "args": { + "External id": 518367,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274146999700.432, "dur": 125.747, + "args": { + "External id": 518368,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 11487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146999731.019, "dur": 91.418, + "args": { + "External id": 518369,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11488, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2379440, + "ts": 6274146999741.476, "dur": 76.251, + "args": { + "External id": 518370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 11489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274146999844.083, "dur": 1.835, + "args": { + "External id": 518371,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11490, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274146999936.712, "dur": 1920.184, + "args": { + "External id": 518372,"Sequence number": 5516243, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11491 + } + }, + { + "ph": "f", "id": 444, "pid": 2338711, "tid": 2379440, "ts": 6274146999936.712, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147000100.778, "dur": 117.653, + "args": { + "External id": 518373,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 11492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274147000265.517, "dur": 42.489, + "args": { + "External id": 518374,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 11493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274147000328.171, "dur": 49.683, + "args": { + "External id": 518375,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 11494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147000388.221, "dur": 31.658, + "args": { + "External id": 518376,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147000426.412, "dur": 33.767, + "args": { + "External id": 518377,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147000467.073, "dur": 28.970, + "args": { + "External id": 518378,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147000504.851, "dur": 31.665, + "args": { + "External id": 518379,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274147000562.420, "dur": 41.726, + "args": { + "External id": 518380,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 11499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274147000632.246, "dur": 41.637, + "args": { + "External id": 518381,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274147000700.806, "dur": 22.078, + "args": { + "External id": 518382,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274147000736.673, "dur": 16.159, + "args": { + "External id": 518383,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147000764.966, "dur": 47.752, + "args": { + "External id": 518384,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147000816.812, "dur": 35.866, + "args": { + "External id": 518385,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274147000884.102, "dur": 307.834, + "args": { + "External id": 518386,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147000964.644, "dur": 6.697, + "args": { + "External id": 518387,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147000973.452, "dur": 3.600, + "args": { + "External id": 518388,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147000978.469, "dur": 2.860, + "args": { + "External id": 518389,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147000982.636, "dur": 4.364, + "args": { + "External id": 518390,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274147001035.892, "dur": 5.364, + "args": { + "External id": 518391,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274147001037.844, "dur": 3.188, + "args": { + "External id": 518392,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274147001043.016, "dur": 74.349, + "args": { + "External id": 518393,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147001049.106, "dur": 1.874, + "args": { + "External id": 518394,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274147001120.283, "dur": 2.645, + "args": { + "External id": 518395,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274147001121.610, "dur": 1.218, + "args": { + "External id": 518396,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274147001124.353, "dur": 19.113, + "args": { + "External id": 518397,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147001127.087, "dur": 0.699, + "args": { + "External id": 518398,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274147001228.044, "dur": 44.630, + "args": { + "External id": 518399,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274147001299.406, "dur": 23.859, + "args": { + "External id": 518400,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147001337.481, "dur": 54.394, + "args": { + "External id": 518401,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147001399.114, "dur": 42.213, + "args": { + "External id": 518402,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147001455.854, "dur": 23.089, + "args": { + "External id": 518403,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147001485.943, "dur": 37.260, + "args": { + "External id": 518404,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147001530.827, "dur": 31.711, + "args": { + "External id": 518405,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147001569.878, "dur": 58.934, + "args": { + "External id": 518406,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274147001656.786, "dur": 28.961, + "args": { + "External id": 518407,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274147001702.075, "dur": 25.751, + "args": { + "External id": 518408,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274147001742.541, "dur": 19.218, + "args": { + "External id": 518409,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274147001776.070, "dur": 16.440, + "args": { + "External id": 518410,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274147001805.356, "dur": 18.231, + "args": { + "External id": 518411,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147001902.403, "dur": 16.565, + "args": { + "External id": 518412,"Record function id": 0, "Ev Idx": 11531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147001906.152, "dur": 11.830, + "args": { + "External id": 518413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147001910.774, "dur": 6.305, + "args": { + "External id": 518414,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147001912.509, "dur": 4.434, + "args": { + "External id": 518415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147001922.753, "dur": 4.280, + "args": { + "External id": 518416,"Record function id": 0, "Ev Idx": 11535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147001924.147, "dur": 2.445, + "args": { + "External id": 518417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147001924.886, "dur": 1.216, + "args": { + "External id": 518418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147001925.391, "dur": 0.620, + "args": { + "External id": 518419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147001930.196, "dur": 4.662, + "args": { + "External id": 518420,"Record function id": 0, "Ev Idx": 11539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147001932.007, "dur": 2.423, + "args": { + "External id": 518421,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147001932.583, "dur": 1.443, + "args": { + "External id": 518422,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147001933.314, "dur": 0.644, + "args": { + "External id": 518423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147001938.039, "dur": 4.264, + "args": { + "External id": 518424,"Record function id": 0, "Ev Idx": 11543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147001939.207, "dur": 2.657, + "args": { + "External id": 518425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147001939.845, "dur": 1.628, + "args": { + "External id": 518426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147001940.593, "dur": 0.753, + "args": { + "External id": 518427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147001945.390, "dur": 3.912, + "args": { + "External id": 518428,"Record function id": 0, "Ev Idx": 11547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147001946.598, "dur": 2.301, + "args": { + "External id": 518429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147001947.247, "dur": 1.043, + "args": { + "External id": 518430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147001947.554, "dur": 0.664, + "args": { + "External id": 518431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147001952.412, "dur": 5.638, + "args": { + "External id": 518432,"Record function id": 0, "Ev Idx": 11551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147001953.656, "dur": 3.957, + "args": { + "External id": 518433,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147001954.132, "dur": 3.054, + "args": { + "External id": 518434,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147001954.407, "dur": 2.692, + "args": { + "External id": 518435,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147001961.230, "dur": 4.316, + "args": { + "External id": 518436,"Record function id": 0, "Ev Idx": 11555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147001962.439, "dur": 2.681, + "args": { + "External id": 518437,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147001963.052, "dur": 1.676, + "args": { + "External id": 518438,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147001963.948, "dur": 0.659, + "args": { + "External id": 518439,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147001969.018, "dur": 4.053, + "args": { + "External id": 518440,"Record function id": 0, "Ev Idx": 11559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147001970.365, "dur": 2.245, + "args": { + "External id": 518441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147001971.066, "dur": 1.094, + "args": { + "External id": 518442,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147001971.521, "dur": 0.524, + "args": { + "External id": 518443,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147001976.477, "dur": 3.670, + "args": { + "External id": 518444,"Record function id": 0, "Ev Idx": 11563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147001977.473, "dur": 2.244, + "args": { + "External id": 518445,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147001978.049, "dur": 1.011, + "args": { + "External id": 518446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147001978.310, "dur": 0.670, + "args": { + "External id": 518447,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274147001984.347, "dur": 66037.699, + "args": { + "External id": 518448,"Record function id": 0, "Sequence number": 5516242, "Fwd thread id": 1, "Ev Idx": 11567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274147001985.576, "dur": 66026.920, + "args": { + "External id": 518449,"Sequence number": 5516242, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11568 + } + }, + { + "ph": "f", "id": 445, "pid": 2338711, "tid": 2379440, "ts": 6274147001985.576, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.1)", "pid": 2338711, "tid": 2379440, + "ts": 6274147002019.696, "dur": 75.284, + "args": { + "External id": 518450,"Record function id": 0, "Ev Idx": 11569 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.1)", "pid": 2338711, "tid": 2379440, + "ts": 6274147002105.421, "dur": 74.743, + "args": { + "External id": 518451,"Record function id": 0, "Ev Idx": 11570 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.1)", "pid": 2338711, "tid": 2379440, + "ts": 6274147002186.254, "dur": 65817.375, + "args": { + "External id": 518452,"Record function id": 0, "Ev Idx": 11571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147002283.520, "dur": 9.019, + "args": { + "External id": 518453,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274147002303.241, "dur": 5.403, + "args": { + "External id": 518454,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274147002323.539, "dur": 64608.804, + "args": { + "External id": 518455,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274147002338.281, "dur": 64580.660, + "args": { + "External id": 518456,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147002440.162, "dur": 19.947, + "args": { + "External id": 518457,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274147002479.878, "dur": 64388.556, + "args": { + "External id": 518458,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274147002482.814, "dur": 64384.383, + "args": { + "External id": 518459,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147002487.658, "dur": 8.947, + "args": { + "External id": 518460,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274147002498.724, "dur": 64362.198, + "args": { + "External id": 518461,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147067088.167, "dur": 16.788, + "args": { + "External id": 518462,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147067092.645, "dur": 11.559, + "args": { + "External id": 518463,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274147067139.606, "dur": 421.490, + "args": { + "External id": 518464,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274147067168.840, "dur": 386.594, + "args": { + "External id": 518465,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11584, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274147067181.442, "dur": 368.532, + "args": { + "External id": 518466,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274147067582.550, "dur": 2.309, + "args": { + "External id": 518467,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11586, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147067685.638, "dur": 8.378, + "args": { + "External id": 518468,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147067709.316, "dur": 40.214, + "args": { + "External id": 518469,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147067760.452, "dur": 1.907, + "args": { + "External id": 518470,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147067775.110, "dur": 12.244, + "args": { + "External id": 518471,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147067793.179, "dur": 1.130, + "args": { + "External id": 518472,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147067798.757, "dur": 11.248, + "args": { + "External id": 518473,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147067815.957, "dur": 0.907, + "args": { + "External id": 518474,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147067821.294, "dur": 10.427, + "args": { + "External id": 518475,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147067836.073, "dur": 3.164, + "args": { + "External id": 518476,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147067843.148, "dur": 10.696, + "args": { + "External id": 518477,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147067858.200, "dur": 0.750, + "args": { + "External id": 518478,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147067863.126, "dur": 9.598, + "args": { + "External id": 518479,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147067877.955, "dur": 0.772, + "args": { + "External id": 518480,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147067882.566, "dur": 10.588, + "args": { + "External id": 518481,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147067897.759, "dur": 0.853, + "args": { + "External id": 518482,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147067902.991, "dur": 9.716, + "args": { + "External id": 518483,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147067916.555, "dur": 0.755, + "args": { + "External id": 518484,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147067921.523, "dur": 10.501, + "args": { + "External id": 518485,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274147068040.063, "dur": 2416.071, + "args": { + "External id": 518486,"Record function id": 0, "Ev Idx": 11605 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.0)", "pid": 2338711, "tid": 2379440, + "ts": 6274147068098.691, "dur": 461.357, + "args": { + "External id": 518487,"Record function id": 0, "Ev Idx": 11606 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2338711, "tid": 2379440, + "ts": 6274147068117.663, "dur": 341.477, + "args": { + "External id": 518488,"Record function id": 0, "Ev Idx": 11607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274147068209.203, "dur": 5.209, + "args": { + "External id": 518489,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274147068218.436, "dur": 1.263, + "args": { + "External id": 518490,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274147068221.597, "dur": 1.054, + "args": { + "External id": 518491,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274147068224.396, "dur": 3.152, + "args": { + "External id": 518492,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274147068229.359, "dur": 1.117, + "args": { + "External id": 518493,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274147068233.491, "dur": 0.893, + "args": { + "External id": 518494,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274147068236.892, "dur": 0.905, + "args": { + "External id": 518495,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274147068239.311, "dur": 2.097, + "args": { + "External id": 518496,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274147068243.000, "dur": 1.051, + "args": { + "External id": 518497,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274147068247.538, "dur": 0.573, + "args": { + "External id": 518498,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274147068267.809, "dur": 158.582, + "args": { + "External id": 518499,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274147068284.843, "dur": 136.525, + "args": { + "External id": 518500,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147068302.303, "dur": 14.606, + "args": { + "External id": 518501,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274147068321.202, "dur": 70.879, + "args": { + "External id": 518502,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 11621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274147068324.376, "dur": 67.268, + "args": { + "External id": 518503,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 11622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147068328.244, "dur": 8.104, + "args": { + "External id": 518504,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274147068338.395, "dur": 52.523, + "args": { + "External id": 518505,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 11624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274147068567.757, "dur": 1865.163, + "args": { + "External id": 518506,"Sequence number": 5516241, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11625 + } + }, + { + "ph": "f", "id": 446, "pid": 2338711, "tid": 2379440, "ts": 6274147068567.757, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147068725.331, "dur": 115.004, + "args": { + "External id": 518507,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 11626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338711, "tid": 2379440, + "ts": 6274147068885.016, "dur": 41.408, + "args": { + "External id": 518508,"kernel_hash": "ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/h2/ch27dzyhtsie4e455hrszbc5gfrankvrhmx6ktvliqv6zkafvkdx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 11627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338711, "tid": 2379440, + "ts": 6274147068946.910, "dur": 53.538, + "args": { + "External id": 518509,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 11628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147069014.967, "dur": 34.119, + "args": { + "External id": 518510,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147069097.318, "dur": 41.355, + "args": { + "External id": 518511,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147069148.007, "dur": 29.690, + "args": { + "External id": 518512,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147069186.828, "dur": 29.994, + "args": { + "External id": 518513,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338711, "tid": 2379440, + "ts": 6274147069242.387, "dur": 26.826, + "args": { + "External id": 518514,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 11633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338711, "tid": 2379440, + "ts": 6274147069287.039, "dur": 30.756, + "args": { + "External id": 518515,"kernel_hash": "cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/cse3ju4lsxlbza5zt6yivcwggwqh2ddrlrw2aswu4mlotinsgzml.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274147069338.092, "dur": 19.124, + "args": { + "External id": 518516,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274147069370.734, "dur": 15.591, + "args": { + "External id": 518517,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147069397.073, "dur": 40.159, + "args": { + "External id": 518518,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147069440.994, "dur": 38.027, + "args": { + "External id": 518519,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338711, "tid": 2379440, + "ts": 6274147069509.380, "dur": 303.544, + "args": { + "External id": 518520,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147069615.271, "dur": 8.503, + "args": { + "External id": 518521,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147069627.062, "dur": 4.160, + "args": { + "External id": 518522,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147069633.554, "dur": 4.238, + "args": { + "External id": 518523,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147069639.528, "dur": 5.314, + "args": { + "External id": 518524,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274147069699.592, "dur": 5.436, + "args": { + "External id": 518525,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274147069701.658, "dur": 3.132, + "args": { + "External id": 518526,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274147069706.777, "dur": 34.232, + "args": { + "External id": 518527,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147069712.425, "dur": 2.140, + "args": { + "External id": 518528,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2379440, + "ts": 6274147069742.807, "dur": 1.218, + "args": { + "External id": 518529,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274147069743.474, "dur": 0.479, + "args": { + "External id": 518530,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2379440, + "ts": 6274147069745.092, "dur": 16.791, + "args": { + "External id": 518531,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147069747.908, "dur": 0.601, + "args": { + "External id": 518532,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2379440, + "ts": 6274147069850.183, "dur": 31.387, + "args": { + "External id": 518533,"kernel_hash": "c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/3w/c3w25fvwgrkopmjklnbclyjgwqku7uspayshwu6ue6cp2f4cxftn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2379440, + "ts": 6274147069898.700, "dur": 17.851, + "args": { + "External id": 518534,"kernel_hash": "chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/hd/chdnrspr3ah5omygjwyxd3ei2ypwtkjyl5cczytthapgc4e7icvb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147069924.954, "dur": 53.716, + "args": { + "External id": 518535,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147069985.352, "dur": 42.502, + "args": { + "External id": 518536,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147070038.807, "dur": 63.473, + "args": { + "External id": 518537,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147070114.633, "dur": 37.303, + "args": { + "External id": 518538,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147070159.666, "dur": 29.548, + "args": { + "External id": 518539,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2379440, + "ts": 6274147070197.510, "dur": 31.029, + "args": { + "External id": 518540,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338711, "tid": 2379440, + "ts": 6274147070248.144, "dur": 25.711, + "args": { + "External id": 518541,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338711, "tid": 2379440, + "ts": 6274147070288.884, "dur": 23.536, + "args": { + "External id": 518542,"kernel_hash": "c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/5i/c5iiz3thbcagbn7pj5phw7gvbnbegcrpujsfhdlrexovnjovsvqb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338711, "tid": 2379440, + "ts": 6274147070326.217, "dur": 17.206, + "args": { + "External id": 518543,"kernel_hash": "ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/k6/ck6sbxyc33id6ar2eixvfqjqw2q2c3fr6rkd2qyzetpxhtnrx2mx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338711, "tid": 2379440, + "ts": 6274147070356.912, "dur": 14.288, + "args": { + "External id": 518544,"kernel_hash": "cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ej/cejxpzmh7kkyjqiiq7m2kdedqhb4a4upr2bc2dtst5kmy2takwh4.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338711, "tid": 2379440, + "ts": 6274147070383.012, "dur": 15.774, + "args": { + "External id": 518545,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147070480.124, "dur": 16.449, + "args": { + "External id": 518546,"Record function id": 0, "Ev Idx": 11665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147070483.573, "dur": 11.933, + "args": { + "External id": 518547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147070488.110, "dur": 6.426, + "args": { + "External id": 518548,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147070489.996, "dur": 4.420, + "args": { + "External id": 518549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147070500.233, "dur": 5.130, + "args": { + "External id": 518550,"Record function id": 0, "Ev Idx": 11669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147070502.097, "dur": 2.816, + "args": { + "External id": 518551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147070502.736, "dur": 1.651, + "args": { + "External id": 518552,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147070503.557, "dur": 0.689, + "args": { + "External id": 518553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147070508.483, "dur": 4.148, + "args": { + "External id": 518554,"Record function id": 0, "Ev Idx": 11673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147070509.949, "dur": 2.286, + "args": { + "External id": 518555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147070510.502, "dur": 1.264, + "args": { + "External id": 518556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147070510.953, "dur": 0.742, + "args": { + "External id": 518557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147070515.801, "dur": 4.318, + "args": { + "External id": 518558,"Record function id": 0, "Ev Idx": 11677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147070517.355, "dur": 2.289, + "args": { + "External id": 518559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147070517.893, "dur": 1.266, + "args": { + "External id": 518560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147070518.344, "dur": 0.684, + "args": { + "External id": 518561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147070523.163, "dur": 4.207, + "args": { + "External id": 518562,"Record function id": 0, "Ev Idx": 11681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147070524.850, "dur": 2.109, + "args": { + "External id": 518563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147070525.370, "dur": 1.116, + "args": { + "External id": 518564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147070525.729, "dur": 0.671, + "args": { + "External id": 518565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147070530.513, "dur": 4.259, + "args": { + "External id": 518566,"Record function id": 0, "Ev Idx": 11685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147070532.075, "dur": 2.257, + "args": { + "External id": 518567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147070532.633, "dur": 1.277, + "args": { + "External id": 518568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147070533.051, "dur": 0.748, + "args": { + "External id": 518569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147070537.882, "dur": 6.113, + "args": { + "External id": 518570,"Record function id": 0, "Ev Idx": 11689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147070539.127, "dur": 4.429, + "args": { + "External id": 518571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147070539.736, "dur": 3.367, + "args": { + "External id": 518572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147070540.435, "dur": 2.544, + "args": { + "External id": 518573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147070547.445, "dur": 4.176, + "args": { + "External id": 518574,"Record function id": 0, "Ev Idx": 11693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147070548.913, "dur": 2.276, + "args": { + "External id": 518575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147070549.372, "dur": 1.352, + "args": { + "External id": 518576,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147070549.809, "dur": 0.837, + "args": { + "External id": 518577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147070554.701, "dur": 3.687, + "args": { + "External id": 518578,"Record function id": 0, "Ev Idx": 11697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147070556.013, "dur": 1.893, + "args": { + "External id": 518579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147070556.468, "dur": 0.956, + "args": { + "External id": 518580,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147070556.746, "dur": 0.610, + "args": { + "External id": 518581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274147070562.384, "dur": 64858.263, + "args": { + "External id": 518582,"Record function id": 0, "Sequence number": 5516240, "Fwd thread id": 1, "Ev Idx": 11701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274147070563.709, "dur": 64846.833, + "args": { + "External id": 518583,"Sequence number": 5516240, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11702 + } + }, + { + "ph": "f", "id": 447, "pid": 2338711, "tid": 2379440, "ts": 6274147070563.709, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.0)", "pid": 2338711, "tid": 2379440, + "ts": 6274147070613.391, "dur": 66.238, + "args": { + "External id": 518584,"Record function id": 0, "Ev Idx": 11703 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.0)", "pid": 2338711, "tid": 2379440, + "ts": 6274147070691.889, "dur": 82.477, + "args": { + "External id": 518585,"Record function id": 0, "Ev Idx": 11704 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.0)", "pid": 2338711, "tid": 2379440, + "ts": 6274147070780.424, "dur": 64622.276, + "args": { + "External id": 518586,"Record function id": 0, "Ev Idx": 11705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147070883.063, "dur": 8.813, + "args": { + "External id": 518587,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274147070903.262, "dur": 5.467, + "args": { + "External id": 518588,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274147070924.191, "dur": 63552.753, + "args": { + "External id": 518589,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274147070938.547, "dur": 63524.317, + "args": { + "External id": 518590,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147071038.106, "dur": 67.316, + "args": { + "External id": 518591,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274147071130.892, "dur": 63276.962, + "args": { + "External id": 518592,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274147071133.970, "dur": 63272.710, + "args": { + "External id": 518593,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147071140.078, "dur": 10.914, + "args": { + "External id": 518594,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274147071153.247, "dur": 63247.110, + "args": { + "External id": 518595,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147134621.110, "dur": 16.052, + "args": { + "External id": 518596,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147134626.077, "dur": 10.301, + "args": { + "External id": 518597,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274147134674.023, "dur": 309.583, + "args": { + "External id": 518598,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274147134704.208, "dur": 274.444, + "args": { + "External id": 518599,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11718, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274147134717.622, "dur": 255.538, + "args": { + "External id": 518600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274147135003.049, "dur": 2.556, + "args": { + "External id": 518601,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11720, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147135106.192, "dur": 7.642, + "args": { + "External id": 518602,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147135126.525, "dur": 36.496, + "args": { + "External id": 518603,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147135174.573, "dur": 1.505, + "args": { + "External id": 518604,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147135181.325, "dur": 12.383, + "args": { + "External id": 518605,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147135199.722, "dur": 0.992, + "args": { + "External id": 518606,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147135205.178, "dur": 10.793, + "args": { + "External id": 518607,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147135221.075, "dur": 0.930, + "args": { + "External id": 518608,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147135225.962, "dur": 9.939, + "args": { + "External id": 518609,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147135240.639, "dur": 3.083, + "args": { + "External id": 518610,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147135248.182, "dur": 11.158, + "args": { + "External id": 518611,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147135263.455, "dur": 1.067, + "args": { + "External id": 518612,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147135268.315, "dur": 10.243, + "args": { + "External id": 518613,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147135282.826, "dur": 0.771, + "args": { + "External id": 518614,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147135287.979, "dur": 10.268, + "args": { + "External id": 518615,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147135302.569, "dur": 0.680, + "args": { + "External id": 518616,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147135307.107, "dur": 9.370, + "args": { + "External id": 518617,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147135321.997, "dur": 0.805, + "args": { + "External id": 518618,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147135326.916, "dur": 10.780, + "args": { + "External id": 518619,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274147135437.198, "dur": 335.622, + "args": { + "External id": 518620,"Record function id": 0, "Sequence number": 5516239, "Fwd thread id": 1, "Ev Idx": 11739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338711, "tid": 2379440, + "ts": 6274147135440.050, "dur": 322.929, + "args": { + "External id": 518621,"Sequence number": 5516239, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11740 + } + }, + { + "ph": "f", "id": 448, "pid": 2338711, "tid": 2379440, "ts": 6274147135440.050, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_0", "pid": 2338711, "tid": 2379440, + "ts": 6274147135569.723, "dur": 67.851, + "args": { + "External id": 518622,"kernel_hash": "cqs2myue7aybil6hnmkybvk3zq47ei4vcoyzevvtnugjbrc4xs4z", "grid": "grid(131072000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "131072000"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqs2myue7aybil6hnmkybvk3zq47ei4vcoyzevvtnugjbrc4xs4z.py", "kernel_backend": "triton", "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 11741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_1", "pid": 2338711, "tid": 2379440, + "ts": 6274147135661.398, "dur": 34.331, + "args": { + "External id": 518623,"kernel_hash": "c6cmd6xpiovdtzbdwl3f2ujlfsh5jockr6d4eebntgc5ux3biyal", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6c/c6cmd6xpiovdtzbdwl3f2ujlfsh5jockr6d4eebntgc5ux3biyal.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096, 4096], [32000, 4096], []], "Ev Idx": 11742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_2", "pid": 2338711, "tid": 2379440, + "ts": 6274147135721.374, "dur": 22.537, + "args": { + "External id": 518624,"kernel_hash": "ccof7kftxamzmzcppmoshsaaubnjbbmiyvgppcoqslpagutqs5jr", "grid": "grid(131072000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "131072000"], "kernel_file": "/tmp/torchinductor_cvm/co/ccof7kftxamzmzcppmoshsaaubnjbbmiyvgppcoqslpagutqs5jr.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 11743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147135790.786, "dur": 18.971, + "args": { + "External id": 518625,"Record function id": 0, "Ev Idx": 11744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338711, "tid": 2379440, + "ts": 6274147135794.095, "dur": 14.786, + "args": { + "External id": 518626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 11745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147135798.526, "dur": 9.340, + "args": { + "External id": 518627,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 11746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2379440, + "ts": 6274147135800.563, "dur": 7.139, + "args": { + "External id": 518628,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 11747 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_post_backward_callback", "pid": 2338711, "tid": 2379440, + "ts": 6274147135835.174, "dur": 14044.685, + "args": { + "External id": 518629,"Record function id": 0, "Ev Idx": 11748 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate", "pid": 2338711, "tid": 2379440, + "ts": 6274147135863.706, "dur": 37.884, + "args": { + "External id": 518630,"Record function id": 0, "Ev Idx": 11749 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard", "pid": 2338711, "tid": 2379440, + "ts": 6274147135908.630, "dur": 272.072, + "args": { + "External id": 518631,"Record function id": 0, "Ev Idx": 11750 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce", "pid": 2338711, "tid": 2379440, + "ts": 6274147136190.063, "dur": 13428.823, + "args": { + "External id": 518632,"Record function id": 0, "Ev Idx": 11751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147136332.458, "dur": 8.608, + "args": { + "External id": 518633,"Record function id": 0, "Concrete Inputs": ["[1134596096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2379440, + "ts": 6274147136352.850, "dur": 5.674, + "args": { + "External id": 518634,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1134596096], []], "Ev Idx": 11753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274147136381.857, "dur": 11438.071, + "args": { + "External id": 518635,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [141824512, 1]], "Input Dims": [[], [], [], [8, 141824512]], "Ev Idx": 11754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338711, "tid": 2379440, + "ts": 6274147136409.978, "dur": 11394.535, + "args": { + "External id": 518636,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [141824512, 1]], "Input Dims": [[], [], [], [8, 141824512]], "Ev Idx": 11755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147137286.341, "dur": 26.414, + "args": { + "External id": 518637,"Record function id": 0, "Concrete Inputs": ["[277237]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2379440, + "ts": 6274147137677.624, "dur": 10074.504, + "args": { + "External id": 518638,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[277237], [], [], [], [], [], [], []], "Ev Idx": 11757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2379440, + "ts": 6274147137683.092, "dur": 10067.479, + "args": { + "External id": 518639,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[277237], [], [], [], [], [], []], "Ev Idx": 11758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147137692.526, "dur": 22.868, + "args": { + "External id": 518640,"Record function id": 0, "Concrete Inputs": ["[277237]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2379440, + "ts": 6274147137722.327, "dur": 10020.782, + "args": { + "External id": 518641,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[277237], [277237], []], "Ev Idx": 11760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147147974.959, "dur": 14.221, + "args": { + "External id": 518642,"Record function id": 0, "Concrete Inputs": ["", "[141824512]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1134596096], [], [], [], [], []], "Ev Idx": 11761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2379440, + "ts": 6274147147980.430, "dur": 8.344, + "args": { + "External id": 518643,"Record function id": 0, "Concrete Inputs": ["[141824512]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338711, "tid": 2379440, + "ts": 6274147148021.051, "dur": 416.059, + "args": { + "External id": 518644,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[141824512], [1134596096], [], [], [], []], "Ev Idx": 11763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274147148047.221, "dur": 384.171, + "args": { + "External id": 518645,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 141824512, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1134596096], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11764, "In msg nelems": 1134596096 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338711, "tid": 2379440, + "ts": 6274147148093.989, "dur": 331.117, + "args": { + "External id": 518646,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1134596096]], "Ev Idx": 11765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2379440, + "ts": 6274147148459.903, "dur": 2.307, + "args": { + "External id": 518647,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11766, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147148525.515, "dur": 7.190, + "args": { + "External id": 518648,"Record function id": 0, "Concrete Inputs": ["", "[4000, 4096]", "[4096, 1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147148545.907, "dur": 30.171, + "args": { + "External id": 518649,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4000, 4096], [4000, 4096], []], "Ev Idx": 11768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147148586.499, "dur": 21.527, + "args": { + "External id": 518650,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147148618.755, "dur": 19.801, + "args": { + "External id": 518651,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147148648.436, "dur": 1.469, + "args": { + "External id": 518652,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "16384512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147148656.616, "dur": 15.899, + "args": { + "External id": 518653,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147148680.498, "dur": 1.657, + "args": { + "External id": 518654,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "18481664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147148688.396, "dur": 14.566, + "args": { + "External id": 518655,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147148708.252, "dur": 1.047, + "args": { + "External id": 518656,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "19005952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147148759.703, "dur": 12.507, + "args": { + "External id": 518657,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147148778.374, "dur": 1.063, + "args": { + "External id": 518658,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "19530240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147148785.768, "dur": 12.304, + "args": { + "External id": 518659,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147148802.999, "dur": 2.891, + "args": { + "External id": 518660,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147148809.234, "dur": 12.441, + "args": { + "External id": 518661,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147148826.551, "dur": 0.726, + "args": { + "External id": 518662,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "21627904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147148831.252, "dur": 11.756, + "args": { + "External id": 518663,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147148847.237, "dur": 0.733, + "args": { + "External id": 518664,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "28967936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147148851.981, "dur": 11.974, + "args": { + "External id": 518665,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147148869.690, "dur": 0.690, + "args": { + "External id": 518666,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "36307968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147148874.357, "dur": 11.000, + "args": { + "External id": 518667,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147148890.019, "dur": 0.902, + "args": { + "External id": 518668,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147148894.828, "dur": 12.985, + "args": { + "External id": 518669,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147148912.080, "dur": 0.816, + "args": { + "External id": 518670,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "43648512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147148916.864, "dur": 11.419, + "args": { + "External id": 518671,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147148932.712, "dur": 0.849, + "args": { + "External id": 518672,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "45745664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147148937.321, "dur": 12.629, + "args": { + "External id": 518673,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147148954.891, "dur": 0.783, + "args": { + "External id": 518674,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "46269952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147148959.170, "dur": 11.741, + "args": { + "External id": 518675,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147148975.259, "dur": 2.718, + "args": { + "External id": 518676,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "46794240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147148983.460, "dur": 12.108, + "args": { + "External id": 518677,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149000.525, "dur": 0.879, + "args": { + "External id": 518678,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149005.145, "dur": 12.594, + "args": { + "External id": 518679,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149022.915, "dur": 1.024, + "args": { + "External id": 518680,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "48891904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149027.425, "dur": 12.472, + "args": { + "External id": 518681,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149044.951, "dur": 0.909, + "args": { + "External id": 518682,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "56231936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149049.876, "dur": 44.689, + "args": { + "External id": 518683,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149105.601, "dur": 1.612, + "args": { + "External id": 518684,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "63571968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149111.739, "dur": 15.062, + "args": { + "External id": 518685,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149131.896, "dur": 0.834, + "args": { + "External id": 518686,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149136.955, "dur": 11.433, + "args": { + "External id": 518687,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149153.146, "dur": 0.901, + "args": { + "External id": 518688,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "70912512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149157.998, "dur": 13.017, + "args": { + "External id": 518689,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149175.986, "dur": 0.802, + "args": { + "External id": 518690,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "73009664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149181.040, "dur": 10.673, + "args": { + "External id": 518691,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149195.781, "dur": 2.712, + "args": { + "External id": 518692,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "73533952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149202.340, "dur": 12.609, + "args": { + "External id": 518693,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149219.068, "dur": 0.772, + "args": { + "External id": 518694,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "74058240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149224.242, "dur": 11.551, + "args": { + "External id": 518695,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149240.050, "dur": 0.733, + "args": { + "External id": 518696,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149244.839, "dur": 12.015, + "args": { + "External id": 518697,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149262.537, "dur": 0.717, + "args": { + "External id": 518698,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "76155904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149266.727, "dur": 10.244, + "args": { + "External id": 518699,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149281.071, "dur": 0.693, + "args": { + "External id": 518700,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "83495936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149285.769, "dur": 10.299, + "args": { + "External id": 518701,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149300.098, "dur": 0.622, + "args": { + "External id": 518702,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "90835968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149304.397, "dur": 10.103, + "args": { + "External id": 518703,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149318.650, "dur": 0.670, + "args": { + "External id": 518704,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149324.755, "dur": 10.832, + "args": { + "External id": 518705,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149340.061, "dur": 0.759, + "args": { + "External id": 518706,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "98176512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149344.904, "dur": 9.533, + "args": { + "External id": 518707,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149359.031, "dur": 2.328, + "args": { + "External id": 518708,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "100273664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149364.831, "dur": 10.340, + "args": { + "External id": 518709,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149380.764, "dur": 0.704, + "args": { + "External id": 518710,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "100797952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149385.765, "dur": 9.877, + "args": { + "External id": 518711,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149399.811, "dur": 0.651, + "args": { + "External id": 518712,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "101322240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149404.297, "dur": 10.618, + "args": { + "External id": 518713,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149418.811, "dur": 0.689, + "args": { + "External id": 518714,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149423.975, "dur": 9.362, + "args": { + "External id": 518715,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149437.839, "dur": 0.672, + "args": { + "External id": 518716,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "103419904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149444.109, "dur": 10.367, + "args": { + "External id": 518717,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149458.626, "dur": 0.659, + "args": { + "External id": 518718,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "110759936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149462.965, "dur": 9.909, + "args": { + "External id": 518719,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149477.085, "dur": 0.623, + "args": { + "External id": 518720,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "118099968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149481.387, "dur": 9.729, + "args": { + "External id": 518721,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149494.756, "dur": 0.946, + "args": { + "External id": 518722,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149499.842, "dur": 9.594, + "args": { + "External id": 518723,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2379440, + "ts": 6274147149515.433, "dur": 2.983, + "args": { + "External id": 518724,"Record function id": 0, "Concrete Inputs": ["", "[4000, 4096]", "[4096, 1]", "125440512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2379440, + "ts": 6274147149522.788, "dur": 11.159, + "args": { + "External id": 518725,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4000, 4096], [4000, 4096], []], "Ev Idx": 11844 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "ProfilerStep#11775", "pid": 2338711, "tid": 2338711, + "ts": 6274140629581.606, "dur": 6549085.778, + "args": { + "External id": 497665,"Record function id": 0, "Ev Idx": 11845 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.zero_grad#AdamW.zero_grad", "pid": 2338711, "tid": 2338711, + "ts": 6274140629638.382, "dur": 681.287, + "args": { + "External id": 497666,"Record function id": 0, "Ev Idx": 11846 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "enumerate(DataLoader)#_StatefulMultiProcessingDataLoaderIter.__next__", "pid": 2338711, "tid": 2338711, + "ts": 6274140630394.566, "dur": 1964.975, + "args": { + "External id": 497667,"Record function id": 0, "Ev Idx": 11847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140631414.230, "dur": 9.067, + "args": { + "External id": 497668,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2338711, "tid": 2338711, + "ts": 6274140631447.548, "dur": 7.307, + "args": { + "External id": 497669,"Sequence number": 5516010, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[8, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 11849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140631860.798, "dur": 2.487, + "args": { + "External id": 497670,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2338711, "tid": 2338711, + "ts": 6274140631873.685, "dur": 2.657, + "args": { + "External id": 497671,"Sequence number": 5516010, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[8, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 11851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140632251.224, "dur": 1.875, + "args": { + "External id": 497672,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2338711, "tid": 2338711, + "ts": 6274140632257.581, "dur": 2.036, + "args": { + "External id": 497673,"Sequence number": 5516010, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[8, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 11853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274140632974.375, "dur": 16.038, + "args": { + "External id": 497674,"Sequence number": 5516010, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], []], "Ev Idx": 11854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140632984.627, "dur": 2.148, + "args": { + "External id": 497675,"Record function id": 0, "Concrete Inputs": ["", "[8, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 11855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274140632991.984, "dur": 4.420, + "args": { + "External id": 497676,"Sequence number": 5516010, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], []], "Ev Idx": 11856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140632994.281, "dur": 1.000, + "args": { + "External id": 497677,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 11857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274140633025.848, "dur": 402.948, + "args": { + "External id": 497678,"Sequence number": 5516010, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], [], []], "Ev Idx": 11858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140633033.669, "dur": 394.704, + "args": { + "External id": 497679,"Sequence number": 5516010, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], []], "Ev Idx": 11859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140633042.108, "dur": 39.753, + "args": { + "External id": 497680,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "[4096, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140633085.861, "dur": 341.168, + "args": { + "External id": 497681,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 11861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274140633096.315, "dur": 0.422, + "args": { + "External id": 497682,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 11862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2338711, "tid": 2338711, + "ts": 6274140633100.191, "dur": 9.551, + "args": { + "External id": 497683,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[8192, 1], [4096, 1]], "Input Dims": [[8, 4096], [8, 4096]], "Ev Idx": 11863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338711, "tid": 2338711, + "ts": 6274140633103.284, "dur": 6.281, + "args": { + "External id": 497684,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], []], "Input Dims": [[8, 4096], [], []], "Ev Idx": 11864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140633108.437, "dur": 0.851, + "args": { + "External id": 497685,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 11865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2338711, + "ts": 6274140633112.019, "dur": 159.935, + "args": { + "External id": 497686,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 11866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2338711, + "ts": 6274140633114.382, "dur": 157.302, + "args": { + "External id": 497687,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 11867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274140633117.457, "dur": 19.457, + "args": { + "External id": 497688,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 11868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140633119.741, "dur": 16.546, + "args": { + "External id": 497689,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140633137.829, "dur": 133.400, + "args": { + "External id": 497690,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 11870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140633273.605, "dur": 151.068, + "args": { + "External id": 497691,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 11871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274140633444.684, "dur": 386.078, + "args": { + "External id": 497692,"Sequence number": 5516010, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], [], []], "Ev Idx": 11872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140633446.400, "dur": 384.051, + "args": { + "External id": 497693,"Sequence number": 5516010, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], []], "Ev Idx": 11873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140633451.965, "dur": 8.041, + "args": { + "External id": 497694,"Record function id": 0, "Concrete Inputs": ["[8, 8192]", "[8192, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140633461.150, "dur": 366.578, + "args": { + "External id": 497695,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[8192, 1], [8192, 1], []], "Input Dims": [[8, 8192], [8, 8192], []], "Ev Idx": 11875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2338711, "tid": 2338711, + "ts": 6274140633858.507, "dur": 52.902, + "args": { + "External id": 497696,"Record function id": 0, "Concrete Inputs": ["0", "4096", "", "", "", "False"], "Input type": ["Scalar", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140633863.189, "dur": 5.738, + "args": { + "External id": 497697,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2338711, "tid": 2338711, + "ts": 6274140633871.911, "dur": 38.979, + "args": { + "External id": 497698,"Record function id": 0, "Concrete Inputs": ["0", "4096", "1", ""], "Input type": ["Scalar", "Scalar", "Scalar", "long int"], "Input Strides": [[], [], [], [1]], "Input Dims": [[], [], [], [0]], "Ev Idx": 11878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274140633876.782, "dur": 8.207, + "args": { + "External id": 497699,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["long int", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 11879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::repeat", "pid": 2338711, "tid": 2338711, + "ts": 6274140633926.394, "dur": 72.922, + "args": { + "External id": 497700,"Sequence number": 5516010, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338711, "tid": 2338711, + "ts": 6274140633930.905, "dur": 8.364, + "args": { + "External id": 497701,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[4096], [], []], "Ev Idx": 11881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140633936.402, "dur": 2.574, + "args": { + "External id": 497702,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "[4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 11882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140633940.361, "dur": 2.886, + "args": { + "External id": 497703,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2338711, + "ts": 6274140633946.116, "dur": 4.426, + "args": { + "External id": 497704,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[4096, 1]], "Input Dims": [[8, 4096]], "Ev Idx": 11884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338711, "tid": 2338711, + "ts": 6274140633953.762, "dur": 7.226, + "args": { + "External id": 497705,"Record function id": 0, "Concrete Inputs": ["", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 11885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140633960.400, "dur": 0.404, + "args": { + "External id": 497706,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1]", "[4096, 1, 4096]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 11886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338711, "tid": 2338711, + "ts": 6274140633961.769, "dur": 5.296, + "args": { + "External id": 497707,"Record function id": 0, "Concrete Inputs": ["", "1", "4096", "4096"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[8, 4096, 1], [], [], []], "Ev Idx": 11887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140633966.068, "dur": 0.893, + "args": { + "External id": 497708,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "[4096, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[8, 4096, 1], [], [], []], "Ev Idx": 11888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2338711, "tid": 2338711, + "ts": 6274140633969.124, "dur": 4.527, + "args": { + "External id": 497709,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[4096, 1], [4096, 4096, 4096, 1]], "Input Dims": [[1, 4096], [8, 1, 1, 4096]], "Ev Idx": 11889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338711, "tid": 2338711, + "ts": 6274140633970.581, "dur": 2.969, + "args": { + "External id": 497710,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1, 4096], [], []], "Ev Idx": 11890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140633972.575, "dur": 0.760, + "args": { + "External id": 497711,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "[0, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 11891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140633974.595, "dur": 24.127, + "args": { + "External id": 497712,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 4096, 4096, 1], [0, 4096, 4096, 1], []], "Input Dims": [[8, 1, 1, 4096], [8, 1, 1, 4096], []], "Ev Idx": 11892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274140634007.435, "dur": 33.976, + "args": { + "External id": 497713,"Sequence number": 5516010, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 11893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140634009.049, "dur": 32.189, + "args": { + "External id": 497714,"Sequence number": 5516010, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], []], "Ev Idx": 11894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634014.624, "dur": 3.192, + "args": { + "External id": 497715,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "[4096, 1]", "3", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140634021.141, "dur": 19.663, + "args": { + "External id": 497716,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 11896 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_pre_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274140634191.577, "dur": 150.208, + "args": { + "External id": 497717,"Record function id": 0, "Ev Idx": 11897 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::inputs_to_device", "pid": 2338711, "tid": 2338711, + "ts": 6274140634274.898, "dur": 56.343, + "args": { + "External id": 497718,"Record function id": 0, "Ev Idx": 11898 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274140634348.818, "dur": 44.672, + "args": { + "External id": 497719,"Record function id": 0, "Ev Idx": 11899 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274140634401.548, "dur": 13500.644, + "args": { + "External id": 497720,"Record function id": 0, "Ev Idx": 11900 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather", "pid": 2338711, "tid": 2338711, + "ts": 6274140634411.497, "dur": 1432.207, + "args": { + "External id": 497721,"Record function id": 0, "Ev Idx": 11901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140634513.624, "dur": 7.652, + "args": { + "External id": 497722,"Record function id": 0, "Concrete Inputs": ["[141824512]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140634537.452, "dur": 164.339, + "args": { + "External id": 497723,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[141824512], [], []], "Ev Idx": 11903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634542.950, "dur": 1.943, + "args": { + "External id": 497724,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634550.628, "dur": 0.344, + "args": { + "External id": 497725,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634551.650, "dur": 0.607, + "args": { + "External id": 497726,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "16384512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634552.825, "dur": 2.131, + "args": { + "External id": 497727,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "18481664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634559.696, "dur": 0.441, + "args": { + "External id": 497728,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19005952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634561.395, "dur": 0.392, + "args": { + "External id": 497729,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "19530240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634562.702, "dur": 2.373, + "args": { + "External id": 497730,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634568.755, "dur": 0.606, + "args": { + "External id": 497731,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "21627904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634570.243, "dur": 0.408, + "args": { + "External id": 497732,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "28967936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634580.272, "dur": 0.382, + "args": { + "External id": 497733,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "36307968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634581.531, "dur": 0.615, + "args": { + "External id": 497734,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634600.060, "dur": 3.239, + "args": { + "External id": 497735,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "43648512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634608.967, "dur": 0.654, + "args": { + "External id": 497736,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45745664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634610.338, "dur": 0.568, + "args": { + "External id": 497737,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46269952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634611.670, "dur": 2.464, + "args": { + "External id": 497738,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "46794240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634617.086, "dur": 0.371, + "args": { + "External id": 497739,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634618.362, "dur": 0.500, + "args": { + "External id": 497740,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "48891904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634624.183, "dur": 0.386, + "args": { + "External id": 497741,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "56231936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634625.553, "dur": 0.359, + "args": { + "External id": 497742,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "63571968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634626.547, "dur": 2.408, + "args": { + "External id": 497743,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634633.010, "dur": 0.474, + "args": { + "External id": 497744,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "70912512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634634.491, "dur": 0.376, + "args": { + "External id": 497745,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73009664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634635.706, "dur": 2.362, + "args": { + "External id": 497746,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73533952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634640.443, "dur": 0.436, + "args": { + "External id": 497747,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "74058240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634641.760, "dur": 0.539, + "args": { + "External id": 497748,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634646.932, "dur": 0.264, + "args": { + "External id": 497749,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "76155904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634647.857, "dur": 0.304, + "args": { + "External id": 497750,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "83495936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634648.917, "dur": 2.657, + "args": { + "External id": 497751,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "90835968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634655.470, "dur": 0.356, + "args": { + "External id": 497752,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634656.752, "dur": 0.302, + "args": { + "External id": 497753,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "98176512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634657.964, "dur": 2.170, + "args": { + "External id": 497754,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100273664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634663.127, "dur": 0.400, + "args": { + "External id": 497755,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100797952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634664.577, "dur": 0.338, + "args": { + "External id": 497756,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "101322240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634669.864, "dur": 0.317, + "args": { + "External id": 497757,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634671.289, "dur": 0.332, + "args": { + "External id": 497758,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "103419904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634672.070, "dur": 2.544, + "args": { + "External id": 497759,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "110759936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634678.699, "dur": 0.311, + "args": { + "External id": 497760,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "118099968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634679.786, "dur": 0.412, + "args": { + "External id": 497761,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634681.273, "dur": 2.835, + "args": { + "External id": 497762,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "125440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140634727.941, "dur": 146.509, + "args": { + "External id": 497763,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 11943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274140634942.904, "dur": 374.875, + "args": { + "External id": 497764,"Record function id": 0, "Concrete Inputs": ["", "", "141824512", "8", "5", "15", ""], "Input type": ["TensorList", "", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 11944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140634958.368, "dur": 5.383, + "args": { + "External id": 497765,"Record function id": 0, "Concrete Inputs": ["[1134596096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274140634971.442, "dur": 14.280, + "args": { + "External id": 497766,"Record function id": 0, "Concrete Inputs": ["", "0", "709122560", "141824512"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1134596096], [], [], []], "Ev Idx": 11946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274140634976.110, "dur": 9.192, + "args": { + "External id": 497767,"Record function id": 0, "Concrete Inputs": ["", "0", "709122560", "850947072", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[1134596096], [], [], [], []], "Ev Idx": 11947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634981.748, "dur": 0.702, + "args": { + "External id": 497768,"Record function id": 0, "Concrete Inputs": ["", "[141824512]", "[1]", "709122560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1134596096], [], [], []], "Ev Idx": 11948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140634993.864, "dur": 162.343, + "args": { + "External id": 497769,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[141824512], [], []], "Ev Idx": 11949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634996.263, "dur": 0.563, + "args": { + "External id": 497770,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "709122560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140634997.781, "dur": 0.520, + "args": { + "External id": 497771,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "725506560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635001.921, "dur": 2.416, + "args": { + "External id": 497772,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "725507072"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635005.025, "dur": 0.940, + "args": { + "External id": 497773,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "727604224"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635009.064, "dur": 0.586, + "args": { + "External id": 497774,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "728128512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635010.634, "dur": 0.289, + "args": { + "External id": 497775,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "728652800"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635012.410, "dur": 0.525, + "args": { + "External id": 497776,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "730749952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635015.775, "dur": 0.340, + "args": { + "External id": 497777,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "730750464"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635016.808, "dur": 0.949, + "args": { + "External id": 497778,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "738090496"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635018.477, "dur": 0.489, + "args": { + "External id": 497779,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "745430528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635021.831, "dur": 2.361, + "args": { + "External id": 497780,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "752770560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635025.309, "dur": 0.670, + "args": { + "External id": 497781,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "752771072"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635029.068, "dur": 2.798, + "args": { + "External id": 497782,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "754868224"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635032.548, "dur": 0.262, + "args": { + "External id": 497783,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "755392512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635033.700, "dur": 0.497, + "args": { + "External id": 497784,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "755916800"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635039.606, "dur": 0.309, + "args": { + "External id": 497785,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "758013952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635040.763, "dur": 0.367, + "args": { + "External id": 497786,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "758014464"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635041.875, "dur": 0.249, + "args": { + "External id": 497787,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "765354496"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635047.059, "dur": 2.201, + "args": { + "External id": 497788,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "772694528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635050.072, "dur": 0.322, + "args": { + "External id": 497789,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "780034560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635087.630, "dur": 3.038, + "args": { + "External id": 497790,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "780035072"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635092.486, "dur": 0.280, + "args": { + "External id": 497791,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "782132224"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635093.444, "dur": 0.564, + "args": { + "External id": 497792,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "782656512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635099.123, "dur": 0.293, + "args": { + "External id": 497793,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "783180800"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635100.541, "dur": 0.358, + "args": { + "External id": 497794,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "785277952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635101.907, "dur": 0.264, + "args": { + "External id": 497795,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "785278464"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635107.362, "dur": 2.474, + "args": { + "External id": 497796,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "792618496"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635110.577, "dur": 0.361, + "args": { + "External id": 497797,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "799958528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635114.326, "dur": 2.202, + "args": { + "External id": 497798,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "807298560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635117.262, "dur": 0.310, + "args": { + "External id": 497799,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "807299072"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635118.263, "dur": 0.279, + "args": { + "External id": 497800,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "809396224"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635123.909, "dur": 0.389, + "args": { + "External id": 497801,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "809920512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635124.754, "dur": 0.338, + "args": { + "External id": 497802,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "810444800"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635125.778, "dur": 0.330, + "args": { + "External id": 497803,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "812541952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635131.634, "dur": 2.417, + "args": { + "External id": 497804,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "812542464"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635134.531, "dur": 0.256, + "args": { + "External id": 497805,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "819882496"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635137.742, "dur": 2.134, + "args": { + "External id": 497806,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "827222528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635140.378, "dur": 0.276, + "args": { + "External id": 497807,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "834562560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140635141.436, "dur": 0.316, + "args": { + "External id": 497808,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "834563072"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140635181.099, "dur": 120.618, + "args": { + "External id": 497809,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 11989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274140635377.543, "dur": 351.376, + "args": { + "External id": 497810,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[1134596096], [141824512], [], [], []], "Ev Idx": 11990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140635419.473, "dur": 303.426, + "args": { + "External id": 497811,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1134596096, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[141824512], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11991, "In msg nelems": 141824512 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274140635430.571, "dur": 286.136, + "args": { + "External id": 497812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[141824512]], "Ev Idx": 11992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140635755.613, "dur": 2.457, + "args": { + "External id": 497813,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11993, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out", "pid": 2338711, "tid": 2338711, + "ts": 6274140635859.769, "dur": 11859.061, + "args": { + "External id": 497814,"Record function id": 0, "Ev Idx": 11994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636042.492, "dur": 6.727, + "args": { + "External id": 497815,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1134596096], []], "Ev Idx": 11995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636053.121, "dur": 1.353, + "args": { + "External id": 497816,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[131072000], []], "Ev Idx": 11996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636091.303, "dur": 1.442, + "args": { + "External id": 497817,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636096.068, "dur": 3.121, + "args": { + "External id": 497818,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636100.609, "dur": 0.635, + "args": { + "External id": 497819,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636105.573, "dur": 0.969, + "args": { + "External id": 497820,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636108.380, "dur": 0.840, + "args": { + "External id": 497821,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636110.581, "dur": 2.469, + "args": { + "External id": 497822,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636116.580, "dur": 0.830, + "args": { + "External id": 497823,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636120.896, "dur": 0.942, + "args": { + "External id": 497824,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636123.446, "dur": 0.670, + "args": { + "External id": 497825,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636125.802, "dur": 2.825, + "args": { + "External id": 497826,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636129.827, "dur": 0.984, + "args": { + "External id": 497827,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636134.812, "dur": 0.957, + "args": { + "External id": 497828,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636137.029, "dur": 1.093, + "args": { + "External id": 497829,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636139.434, "dur": 2.076, + "args": { + "External id": 497830,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636144.507, "dur": 0.622, + "args": { + "External id": 497831,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636149.118, "dur": 0.948, + "args": { + "External id": 497832,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636151.277, "dur": 0.922, + "args": { + "External id": 497833,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636153.411, "dur": 2.346, + "args": { + "External id": 497834,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636157.177, "dur": 0.883, + "args": { + "External id": 497835,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636161.994, "dur": 0.766, + "args": { + "External id": 497836,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636163.963, "dur": 0.795, + "args": { + "External id": 497837,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636165.953, "dur": 2.394, + "args": { + "External id": 497838,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636192.990, "dur": 0.653, + "args": { + "External id": 497839,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636197.494, "dur": 0.813, + "args": { + "External id": 497840,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636199.612, "dur": 0.784, + "args": { + "External id": 497841,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636201.722, "dur": 2.461, + "args": { + "External id": 497842,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636205.274, "dur": 0.743, + "args": { + "External id": 497843,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636209.127, "dur": 1.060, + "args": { + "External id": 497844,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636211.780, "dur": 0.568, + "args": { + "External id": 497845,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636213.541, "dur": 2.492, + "args": { + "External id": 497846,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636219.457, "dur": 0.556, + "args": { + "External id": 497847,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636223.759, "dur": 0.752, + "args": { + "External id": 497848,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636225.772, "dur": 0.817, + "args": { + "External id": 497849,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636227.971, "dur": 2.529, + "args": { + "External id": 497850,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636231.856, "dur": 1.167, + "args": { + "External id": 497851,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636236.280, "dur": 0.823, + "args": { + "External id": 497852,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636238.516, "dur": 0.967, + "args": { + "External id": 497853,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140636240.949, "dur": 2.080, + "args": { + "External id": 497854,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[131072000], []], "Ev Idx": 12034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140636272.379, "dur": 11387.114, + "args": { + "External id": 497855,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[141824512, 1], [], [], []], "Input Dims": [[8, 141824512], [], [], []], "Ev Idx": 12035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140636298.443, "dur": 11350.583, + "args": { + "External id": 497856,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[141824512, 1], [], [], []], "Input Dims": [[8, 141824512], [], [], []], "Ev Idx": 12036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140636323.913, "dur": 5.528, + "args": { + "External id": 497857,"Record function id": 0, "Concrete Inputs": ["[4384]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274140636335.371, "dur": 11270.218, + "args": { + "External id": 497858,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[4384], [], [], [], [], [], [], []], "Ev Idx": 12038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140636337.936, "dur": 11266.907, + "args": { + "External id": 497859,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[4384], [], [], [], [], [], []], "Ev Idx": 12039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140636345.660, "dur": 6.546, + "args": { + "External id": 497860,"Record function id": 0, "Concrete Inputs": ["[4384]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140636353.850, "dur": 11247.275, + "args": { + "External id": 497861,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4384], [4384], []], "Ev Idx": 12041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274140647986.268, "dur": 35.905, + "args": { + "External id": 497862,"Record function id": 0, "Ev Idx": 12042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 0/0", "pid": 2338711, "tid": 2338711, + "ts": 6274140648023.373, "dur": 250.343, + "args": { + "External id": 497863,"Record function id": 0, "Ev Idx": 12043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140648094.448, "dur": 169.428, + "args": { + "External id": 497864,"Sequence number": 5516010, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "long int"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32000, 4096], [8, 4096]], "Ev Idx": 12044 + } + }, + { + "ph": "s", "id": 224, "pid": 2338711, "tid": 2338711, "ts": 6274140648094.448, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140648174.479, "dur": 52.803, + "args": { + "External id": 497865,"kernel_hash": "cwgoxjzj7t5qtecsjcfbkrl7ttzfg44hx3z6lth7z6syiunc2ng5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wg/cwgoxjzj7t5qtecsjcfbkrl7ttzfg44hx3z6lth7z6syiunc2ng5.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096], [32000, 4096], [8, 4096, 4096], []], "Ev Idx": 12045 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274140648339.552, "dur": 56.565, + "args": { + "External id": 497866,"Record function id": 0, "Ev Idx": 12046 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.0)", "pid": 2338711, "tid": 2338711, + "ts": 6274140648407.382, "dur": 8619.211, + "args": { + "External id": 497867,"Record function id": 0, "Ev Idx": 12047 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2338711, "tid": 2338711, + "ts": 6274140648416.208, "dur": 867.597, + "args": { + "External id": 497868,"Record function id": 0, "Ev Idx": 12048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140648488.543, "dur": 11.833, + "args": { + "External id": 497869,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140648512.879, "dur": 46.586, + "args": { + "External id": 497870,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140648523.686, "dur": 2.320, + "args": { + "External id": 497871,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140648527.992, "dur": 0.602, + "args": { + "External id": 497872,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140648529.588, "dur": 2.914, + "args": { + "External id": 497873,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140648535.785, "dur": 0.294, + "args": { + "External id": 497874,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140648536.825, "dur": 0.331, + "args": { + "External id": 497875,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140648543.519, "dur": 0.294, + "args": { + "External id": 497876,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140648544.649, "dur": 0.321, + "args": { + "External id": 497877,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140648546.035, "dur": 2.761, + "args": { + "External id": 497878,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140648552.159, "dur": 0.326, + "args": { + "External id": 497879,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140648569.666, "dur": 90.154, + "args": { + "External id": 497880,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274140648695.575, "dur": 133.817, + "args": { + "External id": 497881,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140648707.218, "dur": 6.659, + "args": { + "External id": 497882,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274140648719.472, "dur": 13.120, + "args": { + "External id": 497883,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274140648724.122, "dur": 8.058, + "args": { + "External id": 497884,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140648729.910, "dur": 0.684, + "args": { + "External id": 497885,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140648739.372, "dur": 33.068, + "args": { + "External id": 497886,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140648741.060, "dur": 0.325, + "args": { + "External id": 497887,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140648745.520, "dur": 0.489, + "args": { + "External id": 497888,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140648749.162, "dur": 0.512, + "args": { + "External id": 497889,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140648752.781, "dur": 0.510, + "args": { + "External id": 497890,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140648753.975, "dur": 4.350, + "args": { + "External id": 497891,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140648759.187, "dur": 0.287, + "args": { + "External id": 497892,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140648762.680, "dur": 0.465, + "args": { + "External id": 497893,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140648766.342, "dur": 0.487, + "args": { + "External id": 497894,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140648767.500, "dur": 0.293, + "args": { + "External id": 497895,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140648785.224, "dur": 34.381, + "args": { + "External id": 497896,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274140648881.549, "dur": 302.248, + "args": { + "External id": 497897,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140648912.747, "dur": 265.611, + "args": { + "External id": 497898,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12078, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274140648922.587, "dur": 249.653, + "args": { + "External id": 497899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140649210.451, "dur": 2.545, + "args": { + "External id": 497900,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12080, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2338711, "tid": 2338711, + "ts": 6274140649305.572, "dur": 7521.489, + "args": { + "External id": 497901,"Record function id": 0, "Ev Idx": 12081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140649401.379, "dur": 6.876, + "args": { + "External id": 497902,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140649411.647, "dur": 1.145, + "args": { + "External id": 497903,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140649414.808, "dur": 0.946, + "args": { + "External id": 497904,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140649419.334, "dur": 3.542, + "args": { + "External id": 497905,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140649424.486, "dur": 1.143, + "args": { + "External id": 497906,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140649426.890, "dur": 0.908, + "args": { + "External id": 497907,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140649429.479, "dur": 0.861, + "args": { + "External id": 497908,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140649436.571, "dur": 2.138, + "args": { + "External id": 497909,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140649439.890, "dur": 1.277, + "args": { + "External id": 497910,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140649442.367, "dur": 0.672, + "args": { + "External id": 497911,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140649460.010, "dur": 7325.666, + "args": { + "External id": 497912,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140649478.637, "dur": 7299.731, + "args": { + "External id": 497913,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140649507.879, "dur": 14.390, + "args": { + "External id": 497914,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274140649528.342, "dur": 7214.563, + "args": { + "External id": 497915,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140649531.368, "dur": 7210.955, + "args": { + "External id": 497916,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140649537.046, "dur": 7.159, + "args": { + "External id": 497917,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140649546.278, "dur": 7192.816, + "args": { + "External id": 497918,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140656970.007, "dur": 31.020, + "args": { + "External id": 497919,"Sequence number": 5516011, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12099 + } + }, + { + "ph": "s", "id": 223, "pid": 2338711, "tid": 2338711, "ts": 6274140656970.007, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274140656986.320, "dur": 10.089, + "args": { + "External id": 497920,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140656992.086, "dur": 4.134, + "args": { + "External id": 497921,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274140657130.014, "dur": 91.401, + "args": { + "External id": 497922,"Record function id": 0, "Ev Idx": 12102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274140657223.218, "dur": 1212.586, + "args": { + "External id": 497923,"Record function id": 0, "Ev Idx": 12103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140657264.321, "dur": 1157.681, + "args": { + "External id": 497924,"Sequence number": 5516012, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12104 + } + }, + { + "ph": "s", "id": 222, "pid": 2338711, "tid": 2338711, "ts": 6274140657264.321, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140657337.931, "dur": 52.643, + "args": { + "External id": 497925,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140657404.036, "dur": 108.962, + "args": { + "External id": 497926,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140657525.949, "dur": 39.442, + "args": { + "External id": 497927,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140657574.381, "dur": 58.229, + "args": { + "External id": 497928,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140657664.365, "dur": 28.733, + "args": { + "External id": 497929,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274140657717.855, "dur": 19.701, + "args": { + "External id": 497930,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274140657762.661, "dur": 134.144, + "args": { + "External id": 497931,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274140657814.084, "dur": 12.708, + "args": { + "External id": 497932,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140657819.042, "dur": 6.970, + "args": { + "External id": 497933,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140657829.283, "dur": 3.637, + "args": { + "External id": 497934,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140657836.756, "dur": 0.908, + "args": { + "External id": 497935,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140657840.578, "dur": 3.189, + "args": { + "External id": 497936,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140657907.612, "dur": 52.232, + "args": { + "External id": 497937,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274140657995.141, "dur": 31.206, + "args": { + "External id": 497938,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140658033.408, "dur": 109.218, + "args": { + "External id": 497939,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140658155.936, "dur": 39.486, + "args": { + "External id": 497940,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140658218.078, "dur": 27.491, + "args": { + "External id": 497941,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140658253.874, "dur": 39.558, + "args": { + "External id": 497942,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140658320.428, "dur": 21.793, + "args": { + "External id": 497943,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12123 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.0)", "pid": 2338711, "tid": 2338711, + "ts": 6274140658503.910, "dur": 118.233, + "args": { + "External id": 497944,"Record function id": 0, "Ev Idx": 12124 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274140658699.962, "dur": 47.050, + "args": { + "External id": 497945,"Record function id": 0, "Ev Idx": 12125 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.1)", "pid": 2338711, "tid": 2338711, + "ts": 6274140658755.569, "dur": 29055.832, + "args": { + "External id": 497946,"Record function id": 0, "Ev Idx": 12126 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2338711, "tid": 2338711, + "ts": 6274140658764.897, "dur": 1017.451, + "args": { + "External id": 497947,"Record function id": 0, "Ev Idx": 12127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140658845.035, "dur": 9.808, + "args": { + "External id": 497948,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140658867.051, "dur": 47.115, + "args": { + "External id": 497949,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140658875.090, "dur": 4.171, + "args": { + "External id": 497950,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140658880.980, "dur": 0.697, + "args": { + "External id": 497951,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140658885.541, "dur": 0.356, + "args": { + "External id": 497952,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140658889.412, "dur": 0.610, + "args": { + "External id": 497953,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140658891.139, "dur": 2.367, + "args": { + "External id": 497954,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140658896.993, "dur": 0.269, + "args": { + "External id": 497955,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140658900.590, "dur": 0.543, + "args": { + "External id": 497956,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140658902.013, "dur": 0.298, + "args": { + "External id": 497957,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140658904.857, "dur": 2.458, + "args": { + "External id": 497958,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140658924.721, "dur": 57.003, + "args": { + "External id": 497959,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274140659016.645, "dur": 207.972, + "args": { + "External id": 497960,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140659032.096, "dur": 3.781, + "args": { + "External id": 497961,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274140659041.339, "dur": 10.144, + "args": { + "External id": 497962,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274140659046.073, "dur": 5.047, + "args": { + "External id": 497963,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140659049.082, "dur": 0.858, + "args": { + "External id": 497964,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140659121.897, "dur": 37.987, + "args": { + "External id": 497965,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140659126.883, "dur": 0.730, + "args": { + "External id": 497966,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140659129.249, "dur": 2.760, + "args": { + "External id": 497967,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140659133.307, "dur": 2.663, + "args": { + "External id": 497968,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140659139.467, "dur": 0.331, + "args": { + "External id": 497969,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140659140.697, "dur": 0.575, + "args": { + "External id": 497970,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140659143.593, "dur": 0.333, + "args": { + "External id": 497971,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140659147.209, "dur": 0.438, + "args": { + "External id": 497972,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140659148.407, "dur": 0.460, + "args": { + "External id": 497973,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140659154.317, "dur": 0.467, + "args": { + "External id": 497974,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140659175.064, "dur": 38.732, + "args": { + "External id": 497975,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274140659281.700, "dur": 399.994, + "args": { + "External id": 497976,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140659316.375, "dur": 359.153, + "args": { + "External id": 497977,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12157, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274140659327.094, "dur": 340.271, + "args": { + "External id": 497978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140659706.298, "dur": 2.918, + "args": { + "External id": 497979,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12159, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2338711, "tid": 2338711, + "ts": 6274140659804.354, "dur": 27797.126, + "args": { + "External id": 497980,"Record function id": 0, "Ev Idx": 12160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140659899.282, "dur": 6.428, + "args": { + "External id": 497981,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140659909.636, "dur": 1.167, + "args": { + "External id": 497982,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140659912.583, "dur": 0.842, + "args": { + "External id": 497983,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140659915.472, "dur": 1.016, + "args": { + "External id": 497984,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140659917.793, "dur": 1.357, + "args": { + "External id": 497985,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140659922.832, "dur": 1.154, + "args": { + "External id": 497986,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140659927.744, "dur": 1.161, + "args": { + "External id": 497987,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140659930.380, "dur": 4.436, + "args": { + "External id": 497988,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140659936.177, "dur": 1.060, + "args": { + "External id": 497989,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140659941.430, "dur": 0.900, + "args": { + "External id": 497990,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140659959.817, "dur": 27571.919, + "args": { + "External id": 497991,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140659978.183, "dur": 27542.678, + "args": { + "External id": 497992,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140660032.571, "dur": 14.531, + "args": { + "External id": 497993,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274140660050.891, "dur": 27428.703, + "args": { + "External id": 497994,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140660096.982, "dur": 27381.717, + "args": { + "External id": 497995,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140660103.384, "dur": 7.922, + "args": { + "External id": 497996,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140660113.412, "dur": 27361.626, + "args": { + "External id": 497997,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140687750.674, "dur": 35.803, + "args": { + "External id": 497998,"Sequence number": 5516013, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12178 + } + }, + { + "ph": "s", "id": 221, "pid": 2338711, "tid": 2338711, "ts": 6274140687750.674, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274140687767.921, "dur": 13.517, + "args": { + "External id": 497999,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140687775.253, "dur": 5.834, + "args": { + "External id": 498000,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274140687851.774, "dur": 75.134, + "args": { + "External id": 498001,"Record function id": 0, "Ev Idx": 12181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274140687928.230, "dur": 1187.208, + "args": { + "External id": 498002,"Record function id": 0, "Ev Idx": 12182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140687963.586, "dur": 1136.163, + "args": { + "External id": 498003,"Sequence number": 5516014, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12183 + } + }, + { + "ph": "s", "id": 220, "pid": 2338711, "tid": 2338711, "ts": 6274140687963.586, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140688040.933, "dur": 86.923, + "args": { + "External id": 498004,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140688149.436, "dur": 108.624, + "args": { + "External id": 498005,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140688268.813, "dur": 37.318, + "args": { + "External id": 498006,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140688312.627, "dur": 29.468, + "args": { + "External id": 498007,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140688376.258, "dur": 25.507, + "args": { + "External id": 498008,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274140688425.526, "dur": 21.556, + "args": { + "External id": 498009,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274140688466.370, "dur": 166.970, + "args": { + "External id": 498010,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274140688517.406, "dur": 12.377, + "args": { + "External id": 498011,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140688523.018, "dur": 5.839, + "args": { + "External id": 498012,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140688534.953, "dur": 6.005, + "args": { + "External id": 498013,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140688542.362, "dur": 3.194, + "args": { + "External id": 498014,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140688547.855, "dur": 2.858, + "args": { + "External id": 498015,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140688646.531, "dur": 52.700, + "args": { + "External id": 498016,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274140688732.503, "dur": 29.120, + "args": { + "External id": 498017,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140688771.978, "dur": 42.030, + "args": { + "External id": 498018,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140688821.655, "dur": 34.901, + "args": { + "External id": 498019,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140688878.824, "dur": 24.021, + "args": { + "External id": 498020,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140688908.878, "dur": 35.224, + "args": { + "External id": 498021,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140688964.432, "dur": 20.291, + "args": { + "External id": 498022,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12202 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.1)", "pid": 2338711, "tid": 2338711, + "ts": 6274140689186.497, "dur": 83.450, + "args": { + "External id": 498023,"Record function id": 0, "Ev Idx": 12203 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274140689344.946, "dur": 47.067, + "args": { + "External id": 498024,"Record function id": 0, "Ev Idx": 12204 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.2)", "pid": 2338711, "tid": 2338711, + "ts": 6274140689400.949, "dur": 29343.340, + "args": { + "External id": 498025,"Record function id": 0, "Ev Idx": 12205 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2338711, "tid": 2338711, + "ts": 6274140689411.722, "dur": 970.961, + "args": { + "External id": 498026,"Record function id": 0, "Ev Idx": 12206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140689492.326, "dur": 9.765, + "args": { + "External id": 498027,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140689517.073, "dur": 44.907, + "args": { + "External id": 498028,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140689522.889, "dur": 2.750, + "args": { + "External id": 498029,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140689530.012, "dur": 0.644, + "args": { + "External id": 498030,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140689534.020, "dur": 0.525, + "args": { + "External id": 498031,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140689535.742, "dur": 0.370, + "args": { + "External id": 498032,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140689539.396, "dur": 0.580, + "args": { + "External id": 498033,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140689543.537, "dur": 0.555, + "args": { + "External id": 498034,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140689544.819, "dur": 4.928, + "args": { + "External id": 498035,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140689552.684, "dur": 0.565, + "args": { + "External id": 498036,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140689554.073, "dur": 0.501, + "args": { + "External id": 498037,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140689573.197, "dur": 82.538, + "args": { + "External id": 498038,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274140689695.527, "dur": 130.014, + "args": { + "External id": 498039,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140689708.901, "dur": 5.125, + "args": { + "External id": 498040,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274140689719.281, "dur": 10.848, + "args": { + "External id": 498041,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274140689723.980, "dur": 5.767, + "args": { + "External id": 498042,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140689727.431, "dur": 0.761, + "args": { + "External id": 498043,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140689737.254, "dur": 35.122, + "args": { + "External id": 498044,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140689741.114, "dur": 2.899, + "args": { + "External id": 498045,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140689745.179, "dur": 0.490, + "args": { + "External id": 498046,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140689746.365, "dur": 0.529, + "args": { + "External id": 498047,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140689752.118, "dur": 2.284, + "args": { + "External id": 498048,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140689755.234, "dur": 0.571, + "args": { + "External id": 498049,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140689759.160, "dur": 0.422, + "args": { + "External id": 498050,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140689761.165, "dur": 0.371, + "args": { + "External id": 498051,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140689762.502, "dur": 0.548, + "args": { + "External id": 498052,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140689766.400, "dur": 0.982, + "args": { + "External id": 498053,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140689783.038, "dur": 34.421, + "args": { + "External id": 498054,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274140689880.279, "dur": 398.505, + "args": { + "External id": 498055,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140689915.081, "dur": 357.603, + "args": { + "External id": 498056,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12236, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274140689926.190, "dur": 340.641, + "args": { + "External id": 498057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140690306.914, "dur": 2.379, + "args": { + "External id": 498058,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12238, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2338711, "tid": 2338711, + "ts": 6274140690404.667, "dur": 28134.220, + "args": { + "External id": 498059,"Record function id": 0, "Ev Idx": 12239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140690504.625, "dur": 6.219, + "args": { + "External id": 498060,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140690514.463, "dur": 1.304, + "args": { + "External id": 498061,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140690517.608, "dur": 3.461, + "args": { + "External id": 498062,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140690522.931, "dur": 0.755, + "args": { + "External id": 498063,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140690525.119, "dur": 1.267, + "args": { + "External id": 498064,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140690528.022, "dur": 0.679, + "args": { + "External id": 498065,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140690532.960, "dur": 0.701, + "args": { + "External id": 498066,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140690535.684, "dur": 2.523, + "args": { + "External id": 498067,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140690539.830, "dur": 1.006, + "args": { + "External id": 498068,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140690542.507, "dur": 0.650, + "args": { + "External id": 498069,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140690565.062, "dur": 27928.334, + "args": { + "External id": 498070,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140690579.878, "dur": 27905.180, + "args": { + "External id": 498071,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140690618.434, "dur": 17.226, + "args": { + "External id": 498072,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274140690639.318, "dur": 27809.216, + "args": { + "External id": 498073,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140690642.448, "dur": 27805.564, + "args": { + "External id": 498074,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140690648.649, "dur": 5.714, + "args": { + "External id": 498075,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140690656.393, "dur": 27788.620, + "args": { + "External id": 498076,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140718686.334, "dur": 32.349, + "args": { + "External id": 498077,"Sequence number": 5516015, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12257 + } + }, + { + "ph": "s", "id": 219, "pid": 2338711, "tid": 2338711, "ts": 6274140718686.334, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274140718704.994, "dur": 8.943, + "args": { + "External id": 498078,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140718708.849, "dur": 4.709, + "args": { + "External id": 498079,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274140718785.453, "dur": 72.529, + "args": { + "External id": 498080,"Record function id": 0, "Ev Idx": 12260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274140718859.400, "dur": 1127.765, + "args": { + "External id": 498081,"Record function id": 0, "Ev Idx": 12261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140718899.349, "dur": 1073.127, + "args": { + "External id": 498082,"Sequence number": 5516016, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12262 + } + }, + { + "ph": "s", "id": 218, "pid": 2338711, "tid": 2338711, "ts": 6274140718899.349, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140718967.430, "dur": 52.804, + "args": { + "External id": 498083,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140719032.424, "dur": 136.781, + "args": { + "External id": 498084,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140719185.655, "dur": 38.946, + "args": { + "External id": 498085,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140719235.319, "dur": 29.600, + "args": { + "External id": 498086,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140719291.872, "dur": 29.003, + "args": { + "External id": 498087,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274140719341.504, "dur": 18.731, + "args": { + "External id": 498088,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274140719384.511, "dur": 136.683, + "args": { + "External id": 498089,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274140719436.684, "dur": 13.852, + "args": { + "External id": 498090,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140719444.449, "dur": 5.264, + "args": { + "External id": 498091,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140719453.327, "dur": 5.026, + "args": { + "External id": 498092,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140719459.547, "dur": 1.344, + "args": { + "External id": 498093,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140719463.823, "dur": 5.521, + "args": { + "External id": 498094,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140719532.212, "dur": 45.692, + "args": { + "External id": 498095,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274140719631.619, "dur": 31.397, + "args": { + "External id": 498096,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140719674.229, "dur": 45.034, + "args": { + "External id": 498097,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140719728.288, "dur": 34.804, + "args": { + "External id": 498098,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140719785.014, "dur": 26.022, + "args": { + "External id": 498099,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140719819.577, "dur": 35.072, + "args": { + "External id": 498100,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140719876.273, "dur": 17.719, + "args": { + "External id": 498101,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12281 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.2)", "pid": 2338711, "tid": 2338711, + "ts": 6274140720093.816, "dur": 84.222, + "args": { + "External id": 498102,"Record function id": 0, "Ev Idx": 12282 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274140720257.109, "dur": 48.085, + "args": { + "External id": 498103,"Record function id": 0, "Ev Idx": 12283 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.3)", "pid": 2338711, "tid": 2338711, + "ts": 6274140720314.708, "dur": 31393.464, + "args": { + "External id": 498104,"Record function id": 0, "Ev Idx": 12284 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2338711, "tid": 2338711, + "ts": 6274140720325.776, "dur": 987.445, + "args": { + "External id": 498105,"Record function id": 0, "Ev Idx": 12285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140720406.780, "dur": 10.119, + "args": { + "External id": 498106,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140720429.665, "dur": 46.419, + "args": { + "External id": 498107,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140720435.266, "dur": 2.360, + "args": { + "External id": 498108,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140720444.724, "dur": 0.666, + "args": { + "External id": 498109,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140720446.278, "dur": 0.538, + "args": { + "External id": 498110,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140720452.764, "dur": 0.396, + "args": { + "External id": 498111,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140720459.042, "dur": 0.589, + "args": { + "External id": 498112,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140720460.520, "dur": 0.502, + "args": { + "External id": 498113,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140720463.715, "dur": 3.145, + "args": { + "External id": 498114,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140720467.760, "dur": 0.572, + "args": { + "External id": 498115,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140720469.015, "dur": 0.529, + "args": { + "External id": 498116,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140720490.536, "dur": 56.293, + "args": { + "External id": 498117,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274140720580.996, "dur": 170.744, + "args": { + "External id": 498118,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140720611.628, "dur": 5.776, + "args": { + "External id": 498119,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274140720623.162, "dur": 11.338, + "args": { + "External id": 498120,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274140720628.062, "dur": 6.033, + "args": { + "External id": 498121,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140720631.572, "dur": 0.962, + "args": { + "External id": 498122,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140720643.569, "dur": 37.706, + "args": { + "External id": 498123,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140720645.456, "dur": 3.080, + "args": { + "External id": 498124,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140720649.507, "dur": 0.865, + "args": { + "External id": 498125,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140720653.877, "dur": 0.580, + "args": { + "External id": 498126,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140720657.443, "dur": 2.143, + "args": { + "External id": 498127,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140720663.017, "dur": 0.449, + "args": { + "External id": 498128,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140720664.400, "dur": 0.462, + "args": { + "External id": 498129,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140720667.774, "dur": 0.479, + "args": { + "External id": 498130,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140720671.464, "dur": 0.516, + "args": { + "External id": 498131,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140720672.969, "dur": 2.931, + "args": { + "External id": 498132,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140720702.909, "dur": 40.047, + "args": { + "External id": 498133,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274140720808.889, "dur": 397.853, + "args": { + "External id": 498134,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140720846.393, "dur": 354.046, + "args": { + "External id": 498135,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12315, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274140720860.473, "dur": 333.922, + "args": { + "External id": 498136,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140721234.961, "dur": 2.536, + "args": { + "External id": 498137,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12317, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2338711, "tid": 2338711, + "ts": 6274140721335.515, "dur": 30157.513, + "args": { + "External id": 498138,"Record function id": 0, "Ev Idx": 12318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140721436.148, "dur": 6.412, + "args": { + "External id": 498139,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140721446.331, "dur": 1.313, + "args": { + "External id": 498140,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140721449.574, "dur": 3.756, + "args": { + "External id": 498141,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140721455.183, "dur": 1.391, + "args": { + "External id": 498142,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140721457.924, "dur": 0.978, + "args": { + "External id": 498143,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140721460.236, "dur": 0.925, + "args": { + "External id": 498144,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140721465.301, "dur": 0.649, + "args": { + "External id": 498145,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140721467.796, "dur": 2.474, + "args": { + "External id": 498146,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140721471.576, "dur": 0.982, + "args": { + "External id": 498147,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140721474.007, "dur": 0.715, + "args": { + "External id": 498148,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140721496.657, "dur": 29952.331, + "args": { + "External id": 498149,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140721513.591, "dur": 29927.333, + "args": { + "External id": 498150,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140721535.584, "dur": 16.670, + "args": { + "External id": 498151,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274140721556.109, "dur": 29849.916, + "args": { + "External id": 498152,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140721559.003, "dur": 29846.487, + "args": { + "External id": 498153,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140721567.537, "dur": 6.290, + "args": { + "External id": 498154,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140721575.644, "dur": 29826.397, + "args": { + "External id": 498155,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140751643.765, "dur": 38.300, + "args": { + "External id": 498156,"Sequence number": 5516017, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12336 + } + }, + { + "ph": "s", "id": 217, "pid": 2338711, "tid": 2338711, "ts": 6274140751643.765, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274140751667.518, "dur": 9.457, + "args": { + "External id": 498157,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140751671.874, "dur": 4.730, + "args": { + "External id": 498158,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274140751750.050, "dur": 71.726, + "args": { + "External id": 498159,"Record function id": 0, "Ev Idx": 12339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274140751825.423, "dur": 1158.995, + "args": { + "External id": 498160,"Record function id": 0, "Ev Idx": 12340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140751865.070, "dur": 1101.595, + "args": { + "External id": 498161,"Sequence number": 5516018, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12341 + } + }, + { + "ph": "s", "id": 216, "pid": 2338711, "tid": 2338711, "ts": 6274140751865.070, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140751933.961, "dur": 51.601, + "args": { + "External id": 498162,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140751998.108, "dur": 135.334, + "args": { + "External id": 498163,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140752154.154, "dur": 45.606, + "args": { + "External id": 498164,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140752206.937, "dur": 30.254, + "args": { + "External id": 498165,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140752269.224, "dur": 26.455, + "args": { + "External id": 498166,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274140752313.989, "dur": 16.406, + "args": { + "External id": 498167,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274140752355.944, "dur": 136.345, + "args": { + "External id": 498168,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274140752412.313, "dur": 11.305, + "args": { + "External id": 498169,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140752417.784, "dur": 4.987, + "args": { + "External id": 498170,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140752426.127, "dur": 4.307, + "args": { + "External id": 498171,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140752432.158, "dur": 1.062, + "args": { + "External id": 498172,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140752437.596, "dur": 4.869, + "args": { + "External id": 498173,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140752502.997, "dur": 47.962, + "args": { + "External id": 498174,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274140752599.746, "dur": 31.888, + "args": { + "External id": 498175,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140752643.702, "dur": 45.779, + "args": { + "External id": 498176,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140752719.151, "dur": 35.943, + "args": { + "External id": 498177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140752786.061, "dur": 24.817, + "args": { + "External id": 498178,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140752815.809, "dur": 35.947, + "args": { + "External id": 498179,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140752869.483, "dur": 20.022, + "args": { + "External id": 498180,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12360 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.3)", "pid": 2338711, "tid": 2338711, + "ts": 6274140753086.156, "dur": 86.501, + "args": { + "External id": 498181,"Record function id": 0, "Ev Idx": 12361 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274140753250.351, "dur": 48.532, + "args": { + "External id": 498182,"Record function id": 0, "Ev Idx": 12362 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.4)", "pid": 2338711, "tid": 2338711, + "ts": 6274140753311.299, "dur": 29955.696, + "args": { + "External id": 498183,"Record function id": 0, "Ev Idx": 12363 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2338711, "tid": 2338711, + "ts": 6274140753323.138, "dur": 1063.929, + "args": { + "External id": 498184,"Record function id": 0, "Ev Idx": 12364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140753406.149, "dur": 9.599, + "args": { + "External id": 498185,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140753429.039, "dur": 44.227, + "args": { + "External id": 498186,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140753436.995, "dur": 2.471, + "args": { + "External id": 498187,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140753443.819, "dur": 0.421, + "args": { + "External id": 498188,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140753445.163, "dur": 0.517, + "args": { + "External id": 498189,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140753448.745, "dur": 0.727, + "args": { + "External id": 498190,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140753451.316, "dur": 0.403, + "args": { + "External id": 498191,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140753454.710, "dur": 0.470, + "args": { + "External id": 498192,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140753456.175, "dur": 4.998, + "args": { + "External id": 498193,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140753461.803, "dur": 0.430, + "args": { + "External id": 498194,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140753465.486, "dur": 0.341, + "args": { + "External id": 498195,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140753485.352, "dur": 59.049, + "args": { + "External id": 498196,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274140753578.525, "dur": 171.301, + "args": { + "External id": 498197,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140753608.919, "dur": 5.389, + "args": { + "External id": 498198,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274140753620.186, "dur": 23.696, + "args": { + "External id": 498199,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274140753624.727, "dur": 18.736, + "args": { + "External id": 498200,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140753640.216, "dur": 1.106, + "args": { + "External id": 498201,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140753652.290, "dur": 36.532, + "args": { + "External id": 498202,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140753654.545, "dur": 3.251, + "args": { + "External id": 498203,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140753660.828, "dur": 0.525, + "args": { + "External id": 498204,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140753662.330, "dur": 0.264, + "args": { + "External id": 498205,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140753668.339, "dur": 2.325, + "args": { + "External id": 498206,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140753671.364, "dur": 0.381, + "args": { + "External id": 498207,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140753672.577, "dur": 0.324, + "args": { + "External id": 498208,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140753678.736, "dur": 0.332, + "args": { + "External id": 498209,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140753679.508, "dur": 0.305, + "args": { + "External id": 498210,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140753680.732, "dur": 2.474, + "args": { + "External id": 498211,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140753702.160, "dur": 38.315, + "args": { + "External id": 498212,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274140753805.665, "dur": 467.940, + "args": { + "External id": 498213,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140753843.046, "dur": 424.109, + "args": { + "External id": 498214,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12394, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274140753853.040, "dur": 407.090, + "args": { + "External id": 498215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140754303.500, "dur": 2.514, + "args": { + "External id": 498216,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12396, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2338711, "tid": 2338711, + "ts": 6274140754410.553, "dur": 28630.794, + "args": { + "External id": 498217,"Record function id": 0, "Ev Idx": 12397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140754513.852, "dur": 6.892, + "args": { + "External id": 498218,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140754524.475, "dur": 1.502, + "args": { + "External id": 498219,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140754527.523, "dur": 3.492, + "args": { + "External id": 498220,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140754532.710, "dur": 1.169, + "args": { + "External id": 498221,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140754535.337, "dur": 0.923, + "args": { + "External id": 498222,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140754537.334, "dur": 0.787, + "args": { + "External id": 498223,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140754542.341, "dur": 0.767, + "args": { + "External id": 498224,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140754546.614, "dur": 2.186, + "args": { + "External id": 498225,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140754550.284, "dur": 0.745, + "args": { + "External id": 498226,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140754552.431, "dur": 0.888, + "args": { + "External id": 498227,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140754574.251, "dur": 28421.655, + "args": { + "External id": 498228,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140754608.969, "dur": 28378.974, + "args": { + "External id": 498229,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140754633.631, "dur": 17.995, + "args": { + "External id": 498230,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274140754655.439, "dur": 28298.597, + "args": { + "External id": 498231,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140754658.357, "dur": 28294.945, + "args": { + "External id": 498232,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140754664.904, "dur": 8.295, + "args": { + "External id": 498233,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140754674.844, "dur": 28275.135, + "args": { + "External id": 498234,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140783207.874, "dur": 33.373, + "args": { + "External id": 498235,"Sequence number": 5516019, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12415 + } + }, + { + "ph": "s", "id": 215, "pid": 2338711, "tid": 2338711, "ts": 6274140783207.874, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274140783224.729, "dur": 11.365, + "args": { + "External id": 498236,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140783230.978, "dur": 4.719, + "args": { + "External id": 498237,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274140783309.546, "dur": 73.097, + "args": { + "External id": 498238,"Record function id": 0, "Ev Idx": 12418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274140783383.772, "dur": 1126.942, + "args": { + "External id": 498239,"Record function id": 0, "Ev Idx": 12419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140783422.247, "dur": 1073.827, + "args": { + "External id": 498240,"Sequence number": 5516020, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12420 + } + }, + { + "ph": "s", "id": 214, "pid": 2338711, "tid": 2338711, "ts": 6274140783422.247, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140783487.993, "dur": 48.966, + "args": { + "External id": 498241,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140783548.969, "dur": 111.596, + "args": { + "External id": 498242,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140783676.689, "dur": 38.317, + "args": { + "External id": 498243,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140783723.975, "dur": 30.776, + "args": { + "External id": 498244,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140783783.508, "dur": 30.115, + "args": { + "External id": 498245,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274140783835.515, "dur": 18.624, + "args": { + "External id": 498246,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274140783877.486, "dur": 135.400, + "args": { + "External id": 498247,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274140783927.208, "dur": 11.907, + "args": { + "External id": 498248,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140783932.765, "dur": 5.530, + "args": { + "External id": 498249,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140783941.870, "dur": 4.536, + "args": { + "External id": 498250,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140783949.678, "dur": 0.913, + "args": { + "External id": 498251,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140783952.825, "dur": 6.639, + "args": { + "External id": 498252,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140784023.998, "dur": 93.101, + "args": { + "External id": 498253,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274140784154.692, "dur": 31.340, + "args": { + "External id": 498254,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140784196.386, "dur": 45.433, + "args": { + "External id": 498255,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140784250.295, "dur": 35.136, + "args": { + "External id": 498256,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140784309.733, "dur": 26.217, + "args": { + "External id": 498257,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140784344.660, "dur": 34.764, + "args": { + "External id": 498258,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140784399.834, "dur": 18.860, + "args": { + "External id": 498259,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12439 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.4)", "pid": 2338711, "tid": 2338711, + "ts": 6274140784601.753, "dur": 84.428, + "args": { + "External id": 498260,"Record function id": 0, "Ev Idx": 12440 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274140784763.995, "dur": 46.305, + "args": { + "External id": 498261,"Record function id": 0, "Ev Idx": 12441 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.5)", "pid": 2338711, "tid": 2338711, + "ts": 6274140784819.283, "dur": 32824.326, + "args": { + "External id": 498262,"Record function id": 0, "Ev Idx": 12442 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2338711, "tid": 2338711, + "ts": 6274140784828.726, "dur": 1025.227, + "args": { + "External id": 498263,"Record function id": 0, "Ev Idx": 12443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140784910.269, "dur": 9.705, + "args": { + "External id": 498264,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140784932.772, "dur": 42.838, + "args": { + "External id": 498265,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140784940.386, "dur": 2.260, + "args": { + "External id": 498266,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140784946.283, "dur": 0.413, + "args": { + "External id": 498267,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140784949.256, "dur": 0.513, + "args": { + "External id": 498268,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140784950.845, "dur": 0.707, + "args": { + "External id": 498269,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140784954.672, "dur": 0.682, + "args": { + "External id": 498270,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140784958.078, "dur": 0.913, + "args": { + "External id": 498271,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140784959.691, "dur": 4.536, + "args": { + "External id": 498272,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140784965.097, "dur": 0.413, + "args": { + "External id": 498273,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140784968.133, "dur": 0.532, + "args": { + "External id": 498274,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140784987.013, "dur": 53.636, + "args": { + "External id": 498275,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274140785116.037, "dur": 142.404, + "args": { + "External id": 498276,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140785131.604, "dur": 5.894, + "args": { + "External id": 498277,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274140785143.169, "dur": 11.198, + "args": { + "External id": 498278,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274140785147.752, "dur": 6.237, + "args": { + "External id": 498279,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140785151.383, "dur": 0.988, + "args": { + "External id": 498280,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140785161.427, "dur": 35.284, + "args": { + "External id": 498281,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140785164.812, "dur": 2.575, + "args": { + "External id": 498282,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140785168.279, "dur": 0.553, + "args": { + "External id": 498283,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140785169.642, "dur": 0.492, + "args": { + "External id": 498284,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140785176.209, "dur": 2.211, + "args": { + "External id": 498285,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140785179.194, "dur": 0.398, + "args": { + "External id": 498286,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140785180.273, "dur": 0.398, + "args": { + "External id": 498287,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140785185.648, "dur": 0.567, + "args": { + "External id": 498288,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140785186.882, "dur": 0.540, + "args": { + "External id": 498289,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140785189.357, "dur": 2.730, + "args": { + "External id": 498290,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140785212.778, "dur": 34.101, + "args": { + "External id": 498291,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274140785319.121, "dur": 424.154, + "args": { + "External id": 498292,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140785353.362, "dur": 383.626, + "args": { + "External id": 498293,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12473, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274140785363.679, "dur": 366.280, + "args": { + "External id": 498294,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140785769.937, "dur": 2.345, + "args": { + "External id": 498295,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12475, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2338711, "tid": 2338711, + "ts": 6274140785878.841, "dur": 31558.063, + "args": { + "External id": 498296,"Record function id": 0, "Ev Idx": 12476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140785976.464, "dur": 6.763, + "args": { + "External id": 498297,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140785986.581, "dur": 1.392, + "args": { + "External id": 498298,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140785989.717, "dur": 3.243, + "args": { + "External id": 498299,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140785994.897, "dur": 0.780, + "args": { + "External id": 498300,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140785997.111, "dur": 1.148, + "args": { + "External id": 498301,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140786004.997, "dur": 0.688, + "args": { + "External id": 498302,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140786009.560, "dur": 0.754, + "args": { + "External id": 498303,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140786011.770, "dur": 2.760, + "args": { + "External id": 498304,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140786016.002, "dur": 0.827, + "args": { + "External id": 498305,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140786020.333, "dur": 0.639, + "args": { + "External id": 498306,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140786039.094, "dur": 31353.111, + "args": { + "External id": 498307,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140786102.197, "dur": 31281.957, + "args": { + "External id": 498308,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140786122.221, "dur": 17.436, + "args": { + "External id": 498309,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274140786143.860, "dur": 31204.250, + "args": { + "External id": 498310,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140786146.921, "dur": 31200.657, + "args": { + "External id": 498311,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140786152.940, "dur": 5.496, + "args": { + "External id": 498312,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140786160.084, "dur": 31184.491, + "args": { + "External id": 498313,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140817572.090, "dur": 42.949, + "args": { + "External id": 498314,"Sequence number": 5516021, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12494 + } + }, + { + "ph": "s", "id": 213, "pid": 2338711, "tid": 2338711, "ts": 6274140817572.090, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274140817599.225, "dur": 10.809, + "args": { + "External id": 498315,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140817604.890, "dur": 4.830, + "args": { + "External id": 498316,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274140817687.041, "dur": 78.953, + "args": { + "External id": 498317,"Record function id": 0, "Ev Idx": 12497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274140817767.093, "dur": 1143.264, + "args": { + "External id": 498318,"Record function id": 0, "Ev Idx": 12498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140817807.354, "dur": 1088.903, + "args": { + "External id": 498319,"Sequence number": 5516022, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12499 + } + }, + { + "ph": "s", "id": 212, "pid": 2338711, "tid": 2338711, "ts": 6274140817807.354, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140817878.353, "dur": 53.645, + "args": { + "External id": 498320,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140817944.639, "dur": 141.947, + "args": { + "External id": 498321,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140818104.156, "dur": 44.022, + "args": { + "External id": 498322,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140818158.915, "dur": 29.741, + "args": { + "External id": 498323,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140818217.106, "dur": 33.229, + "args": { + "External id": 498324,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274140818269.303, "dur": 16.323, + "args": { + "External id": 498325,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274140818318.708, "dur": 134.512, + "args": { + "External id": 498326,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274140818372.610, "dur": 11.253, + "args": { + "External id": 498327,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140818377.856, "dur": 5.156, + "args": { + "External id": 498328,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140818387.935, "dur": 5.103, + "args": { + "External id": 498329,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140818394.129, "dur": 1.190, + "args": { + "External id": 498330,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140818397.713, "dur": 4.162, + "args": { + "External id": 498331,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140818464.238, "dur": 47.415, + "args": { + "External id": 498332,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274140818544.596, "dur": 29.675, + "args": { + "External id": 498333,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140818599.336, "dur": 47.678, + "args": { + "External id": 498334,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140818657.547, "dur": 35.079, + "args": { + "External id": 498335,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140818716.076, "dur": 26.095, + "args": { + "External id": 498336,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140818750.115, "dur": 35.043, + "args": { + "External id": 498337,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140818804.259, "dur": 18.882, + "args": { + "External id": 498338,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12518 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.5)", "pid": 2338711, "tid": 2338711, + "ts": 6274140818979.158, "dur": 121.571, + "args": { + "External id": 498339,"Record function id": 0, "Ev Idx": 12519 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274140819188.719, "dur": 49.971, + "args": { + "External id": 498340,"Record function id": 0, "Ev Idx": 12520 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.6)", "pid": 2338711, "tid": 2338711, + "ts": 6274140819247.959, "dur": 32045.744, + "args": { + "External id": 498341,"Record function id": 0, "Ev Idx": 12521 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2338711, "tid": 2338711, + "ts": 6274140819258.845, "dur": 1059.662, + "args": { + "External id": 498342,"Record function id": 0, "Ev Idx": 12522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140819340.263, "dur": 9.760, + "args": { + "External id": 498343,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140819362.757, "dur": 41.336, + "args": { + "External id": 498344,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140819368.325, "dur": 2.489, + "args": { + "External id": 498345,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140819375.108, "dur": 0.542, + "args": { + "External id": 498346,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140819378.738, "dur": 0.815, + "args": { + "External id": 498347,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140819380.141, "dur": 0.658, + "args": { + "External id": 498348,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140819385.960, "dur": 0.359, + "args": { + "External id": 498349,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140819386.820, "dur": 0.694, + "args": { + "External id": 498350,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140819388.054, "dur": 5.001, + "args": { + "External id": 498351,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140819395.203, "dur": 0.363, + "args": { + "External id": 498352,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140819396.370, "dur": 0.345, + "args": { + "External id": 498353,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140819414.817, "dur": 57.295, + "args": { + "External id": 498354,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274140819507.055, "dur": 149.957, + "args": { + "External id": 498355,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140819518.715, "dur": 4.255, + "args": { + "External id": 498356,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274140819528.345, "dur": 10.249, + "args": { + "External id": 498357,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274140819532.933, "dur": 5.233, + "args": { + "External id": 498358,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140819536.175, "dur": 0.686, + "args": { + "External id": 498359,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140819545.045, "dur": 33.953, + "args": { + "External id": 498360,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140819548.513, "dur": 2.655, + "args": { + "External id": 498361,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140819551.890, "dur": 0.540, + "args": { + "External id": 498362,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140819555.458, "dur": 0.346, + "args": { + "External id": 498363,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140819558.976, "dur": 2.528, + "args": { + "External id": 498364,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140819562.310, "dur": 0.260, + "args": { + "External id": 498365,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140819565.215, "dur": 0.581, + "args": { + "External id": 498366,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140819568.556, "dur": 0.260, + "args": { + "External id": 498367,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140819569.559, "dur": 0.349, + "args": { + "External id": 498368,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140819572.163, "dur": 2.000, + "args": { + "External id": 498369,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140819608.909, "dur": 39.156, + "args": { + "External id": 498370,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274140819716.977, "dur": 489.212, + "args": { + "External id": 498371,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140819754.771, "dur": 444.530, + "args": { + "External id": 498372,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12552, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274140819764.652, "dur": 424.381, + "args": { + "External id": 498373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140820235.875, "dur": 2.554, + "args": { + "External id": 498374,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12554, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2338711, "tid": 2338711, + "ts": 6274140820339.947, "dur": 30758.287, + "args": { + "External id": 498375,"Record function id": 0, "Ev Idx": 12555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140820442.097, "dur": 6.911, + "args": { + "External id": 498376,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140820452.926, "dur": 1.110, + "args": { + "External id": 498377,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140820455.720, "dur": 3.789, + "args": { + "External id": 498378,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140820461.330, "dur": 0.810, + "args": { + "External id": 498379,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140820463.490, "dur": 0.912, + "args": { + "External id": 498380,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140820465.656, "dur": 0.805, + "args": { + "External id": 498381,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140820470.197, "dur": 0.936, + "args": { + "External id": 498382,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140820472.551, "dur": 2.128, + "args": { + "External id": 498383,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140820476.250, "dur": 0.635, + "args": { + "External id": 498384,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140820478.244, "dur": 0.918, + "args": { + "External id": 498385,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140820501.277, "dur": 30527.838, + "args": { + "External id": 498386,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140820516.996, "dur": 30504.302, + "args": { + "External id": 498387,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140820535.979, "dur": 15.734, + "args": { + "External id": 498388,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274140820555.381, "dur": 30429.607, + "args": { + "External id": 498389,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140820558.140, "dur": 30426.234, + "args": { + "External id": 498390,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140820565.776, "dur": 7.164, + "args": { + "External id": 498391,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140820574.756, "dur": 30406.583, + "args": { + "External id": 498392,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140851238.885, "dur": 30.155, + "args": { + "External id": 498393,"Sequence number": 5516023, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12573 + } + }, + { + "ph": "s", "id": 211, "pid": 2338711, "tid": 2338711, "ts": 6274140851238.885, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274140851255.223, "dur": 8.717, + "args": { + "External id": 498394,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140851258.703, "dur": 4.978, + "args": { + "External id": 498395,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274140851333.330, "dur": 71.058, + "args": { + "External id": 498396,"Record function id": 0, "Ev Idx": 12576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274140851405.932, "dur": 1148.415, + "args": { + "External id": 498397,"Record function id": 0, "Ev Idx": 12577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140851447.572, "dur": 1092.300, + "args": { + "External id": 498398,"Sequence number": 5516024, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12578 + } + }, + { + "ph": "s", "id": 210, "pid": 2338711, "tid": 2338711, "ts": 6274140851447.572, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140851512.997, "dur": 50.095, + "args": { + "External id": 498399,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140851575.274, "dur": 121.114, + "args": { + "External id": 498400,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140851711.740, "dur": 37.895, + "args": { + "External id": 498401,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140851758.810, "dur": 33.883, + "args": { + "External id": 498402,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140851824.214, "dur": 26.823, + "args": { + "External id": 498403,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274140851871.715, "dur": 20.889, + "args": { + "External id": 498404,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274140851919.380, "dur": 175.447, + "args": { + "External id": 498405,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274140851974.872, "dur": 13.352, + "args": { + "External id": 498406,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140851982.242, "dur": 5.033, + "args": { + "External id": 498407,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140851990.849, "dur": 4.102, + "args": { + "External id": 498408,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140851996.114, "dur": 0.948, + "args": { + "External id": 498409,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140851999.569, "dur": 5.964, + "args": { + "External id": 498410,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140852108.409, "dur": 56.726, + "args": { + "External id": 498411,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274140852203.637, "dur": 35.645, + "args": { + "External id": 498412,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140852249.193, "dur": 41.574, + "args": { + "External id": 498413,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140852298.258, "dur": 34.543, + "args": { + "External id": 498414,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140852353.264, "dur": 24.675, + "args": { + "External id": 498415,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140852386.059, "dur": 37.106, + "args": { + "External id": 498416,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140852443.482, "dur": 21.116, + "args": { + "External id": 498417,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12597 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.6)", "pid": 2338711, "tid": 2338711, + "ts": 6274140852638.639, "dur": 83.883, + "args": { + "External id": 498418,"Record function id": 0, "Ev Idx": 12598 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274140852798.809, "dur": 47.462, + "args": { + "External id": 498419,"Record function id": 0, "Ev Idx": 12599 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.7)", "pid": 2338711, "tid": 2338711, + "ts": 6274140852855.269, "dur": 30494.621, + "args": { + "External id": 498420,"Record function id": 0, "Ev Idx": 12600 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2338711, "tid": 2338711, + "ts": 6274140852865.797, "dur": 1037.401, + "args": { + "External id": 498421,"Record function id": 0, "Ev Idx": 12601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140852950.407, "dur": 9.231, + "args": { + "External id": 498422,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140852972.546, "dur": 45.117, + "args": { + "External id": 498423,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140852982.754, "dur": 2.384, + "args": { + "External id": 498424,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140852990.997, "dur": 0.646, + "args": { + "External id": 498425,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140852992.796, "dur": 0.378, + "args": { + "External id": 498426,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140852994.144, "dur": 0.461, + "args": { + "External id": 498427,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140852999.214, "dur": 0.656, + "args": { + "External id": 498428,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140853000.675, "dur": 0.685, + "args": { + "External id": 498429,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140853003.934, "dur": 4.426, + "args": { + "External id": 498430,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140853009.325, "dur": 0.509, + "args": { + "External id": 498431,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140853010.325, "dur": 0.400, + "args": { + "External id": 498432,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140853030.556, "dur": 93.531, + "args": { + "External id": 498433,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274140853163.044, "dur": 136.037, + "args": { + "External id": 498434,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140853176.343, "dur": 5.451, + "args": { + "External id": 498435,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274140853187.061, "dur": 10.967, + "args": { + "External id": 498436,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274140853191.789, "dur": 5.803, + "args": { + "External id": 498437,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140853195.289, "dur": 0.830, + "args": { + "External id": 498438,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140853206.078, "dur": 32.934, + "args": { + "External id": 498439,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140853208.147, "dur": 2.723, + "args": { + "External id": 498440,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140853211.967, "dur": 0.684, + "args": { + "External id": 498441,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140853215.320, "dur": 0.450, + "args": { + "External id": 498442,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140853219.039, "dur": 2.393, + "args": { + "External id": 498443,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140853223.571, "dur": 0.429, + "args": { + "External id": 498444,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140853224.819, "dur": 0.492, + "args": { + "External id": 498445,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140853227.659, "dur": 0.421, + "args": { + "External id": 498446,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140853230.992, "dur": 0.523, + "args": { + "External id": 498447,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140853232.040, "dur": 2.246, + "args": { + "External id": 498448,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140853254.814, "dur": 36.083, + "args": { + "External id": 498449,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274140853355.639, "dur": 437.715, + "args": { + "External id": 498450,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140853389.817, "dur": 397.531, + "args": { + "External id": 498451,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12631, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274140853402.170, "dur": 378.665, + "args": { + "External id": 498452,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140853821.679, "dur": 2.344, + "args": { + "External id": 498453,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12633, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2338711, "tid": 2338711, + "ts": 6274140853925.307, "dur": 29221.751, + "args": { + "External id": 498454,"Record function id": 0, "Ev Idx": 12634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140854039.291, "dur": 7.039, + "args": { + "External id": 498455,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140854049.726, "dur": 0.978, + "args": { + "External id": 498456,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140854086.111, "dur": 3.888, + "args": { + "External id": 498457,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140854095.000, "dur": 1.110, + "args": { + "External id": 498458,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140854097.473, "dur": 0.909, + "args": { + "External id": 498459,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140854099.613, "dur": 0.687, + "args": { + "External id": 498460,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140854101.736, "dur": 0.737, + "args": { + "External id": 498461,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140854107.246, "dur": 2.258, + "args": { + "External id": 498462,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140854110.863, "dur": 0.849, + "args": { + "External id": 498463,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140854112.975, "dur": 0.446, + "args": { + "External id": 498464,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140854134.971, "dur": 28966.449, + "args": { + "External id": 498465,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140854152.864, "dur": 28939.783, + "args": { + "External id": 498466,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140854171.830, "dur": 16.663, + "args": { + "External id": 498467,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274140854192.116, "dur": 28834.026, + "args": { + "External id": 498468,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140854194.823, "dur": 28830.788, + "args": { + "External id": 498469,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140854200.405, "dur": 7.542, + "args": { + "External id": 498470,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140854209.698, "dur": 28812.410, + "args": { + "External id": 498471,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140883287.778, "dur": 36.614, + "args": { + "External id": 498472,"Sequence number": 5516025, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12652 + } + }, + { + "ph": "s", "id": 209, "pid": 2338711, "tid": 2338711, "ts": 6274140883287.778, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274140883305.666, "dur": 13.861, + "args": { + "External id": 498473,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140883314.842, "dur": 4.428, + "args": { + "External id": 498474,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274140883390.859, "dur": 75.196, + "args": { + "External id": 498475,"Record function id": 0, "Ev Idx": 12655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274140883467.435, "dur": 1166.264, + "args": { + "External id": 498476,"Record function id": 0, "Ev Idx": 12656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140883506.964, "dur": 1111.561, + "args": { + "External id": 498477,"Sequence number": 5516026, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12657 + } + }, + { + "ph": "s", "id": 208, "pid": 2338711, "tid": 2338711, "ts": 6274140883506.964, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140883577.836, "dur": 63.814, + "args": { + "External id": 498478,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140883657.602, "dur": 103.974, + "args": { + "External id": 498479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140883774.418, "dur": 39.175, + "args": { + "External id": 498480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140883822.524, "dur": 29.055, + "args": { + "External id": 498481,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140883878.732, "dur": 28.683, + "args": { + "External id": 498482,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274140883935.925, "dur": 17.577, + "args": { + "External id": 498483,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274140883977.572, "dur": 199.514, + "args": { + "External id": 498484,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274140884030.338, "dur": 11.050, + "args": { + "External id": 498485,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140884035.348, "dur": 5.196, + "args": { + "External id": 498486,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140884047.106, "dur": 3.577, + "args": { + "External id": 498487,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140884093.420, "dur": 2.474, + "args": { + "External id": 498488,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140884105.304, "dur": 8.055, + "args": { + "External id": 498489,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140884189.258, "dur": 55.960, + "args": { + "External id": 498490,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274140884277.555, "dur": 31.275, + "args": { + "External id": 498491,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140884320.965, "dur": 42.124, + "args": { + "External id": 498492,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140884369.095, "dur": 34.672, + "args": { + "External id": 498493,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140884424.574, "dur": 28.087, + "args": { + "External id": 498494,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140884457.809, "dur": 34.319, + "args": { + "External id": 498495,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140884512.893, "dur": 17.893, + "args": { + "External id": 498496,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12676 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.7)", "pid": 2338711, "tid": 2338711, + "ts": 6274140884702.435, "dur": 79.542, + "args": { + "External id": 498497,"Record function id": 0, "Ev Idx": 12677 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274140884855.666, "dur": 45.894, + "args": { + "External id": 498498,"Record function id": 0, "Ev Idx": 12678 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.8)", "pid": 2338711, "tid": 2338711, + "ts": 6274140884913.642, "dur": 31511.023, + "args": { + "External id": 498499,"Record function id": 0, "Ev Idx": 12679 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2338711, "tid": 2338711, + "ts": 6274140884922.232, "dur": 1012.831, + "args": { + "External id": 498500,"Record function id": 0, "Ev Idx": 12680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140885006.012, "dur": 8.851, + "args": { + "External id": 498501,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140885029.520, "dur": 79.102, + "args": { + "External id": 498502,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140885035.580, "dur": 2.399, + "args": { + "External id": 498503,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140885042.773, "dur": 0.661, + "args": { + "External id": 498504,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140885045.151, "dur": 0.565, + "args": { + "External id": 498505,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140885047.671, "dur": 0.676, + "args": { + "External id": 498506,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140885052.014, "dur": 0.276, + "args": { + "External id": 498507,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140885089.882, "dur": 0.799, + "args": { + "External id": 498508,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140885092.226, "dur": 5.243, + "args": { + "External id": 498509,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140885099.048, "dur": 0.471, + "args": { + "External id": 498510,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140885100.869, "dur": 0.503, + "args": { + "External id": 498511,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140885121.934, "dur": 56.129, + "args": { + "External id": 498512,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274140885216.023, "dur": 130.567, + "args": { + "External id": 498513,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140885228.895, "dur": 5.465, + "args": { + "External id": 498514,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274140885239.623, "dur": 13.751, + "args": { + "External id": 498515,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274140885246.479, "dur": 6.469, + "args": { + "External id": 498516,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140885250.799, "dur": 0.860, + "args": { + "External id": 498517,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140885261.127, "dur": 35.562, + "args": { + "External id": 498518,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140885263.542, "dur": 2.725, + "args": { + "External id": 498519,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140885268.109, "dur": 0.409, + "args": { + "External id": 498520,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140885270.299, "dur": 0.407, + "args": { + "External id": 498521,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140885274.765, "dur": 2.594, + "args": { + "External id": 498522,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140885278.831, "dur": 0.333, + "args": { + "External id": 498523,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140885280.321, "dur": 0.273, + "args": { + "External id": 498524,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140885285.463, "dur": 0.461, + "args": { + "External id": 498525,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140885287.423, "dur": 0.652, + "args": { + "External id": 498526,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140885289.669, "dur": 2.525, + "args": { + "External id": 498527,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140885307.380, "dur": 30.850, + "args": { + "External id": 498528,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274140885403.899, "dur": 426.461, + "args": { + "External id": 498529,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140885437.433, "dur": 386.811, + "args": { + "External id": 498530,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12710, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274140885447.807, "dur": 370.523, + "args": { + "External id": 498531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140885856.928, "dur": 2.820, + "args": { + "External id": 498532,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12712, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2338711, "tid": 2338711, + "ts": 6274140885956.829, "dur": 30270.567, + "args": { + "External id": 498533,"Record function id": 0, "Ev Idx": 12713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140886091.164, "dur": 7.366, + "args": { + "External id": 498534,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140886104.846, "dur": 1.060, + "args": { + "External id": 498535,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140886107.821, "dur": 3.153, + "args": { + "External id": 498536,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140886113.364, "dur": 0.984, + "args": { + "External id": 498537,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140886116.062, "dur": 0.874, + "args": { + "External id": 498538,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140886118.442, "dur": 1.015, + "args": { + "External id": 498539,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140886123.442, "dur": 0.928, + "args": { + "External id": 498540,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140886126.018, "dur": 1.896, + "args": { + "External id": 498541,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140886129.469, "dur": 0.708, + "args": { + "External id": 498542,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140886131.699, "dur": 0.697, + "args": { + "External id": 498543,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140886166.633, "dur": 30013.164, + "args": { + "External id": 498544,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140886184.242, "dur": 29987.337, + "args": { + "External id": 498545,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140886203.156, "dur": 17.682, + "args": { + "External id": 498546,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274140886224.382, "dur": 29913.387, + "args": { + "External id": 498547,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140886226.815, "dur": 29910.216, + "args": { + "External id": 498548,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140886233.012, "dur": 5.378, + "args": { + "External id": 498549,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140886240.113, "dur": 29893.511, + "args": { + "External id": 498550,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140916368.435, "dur": 30.250, + "args": { + "External id": 498551,"Sequence number": 5516027, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12731 + } + }, + { + "ph": "s", "id": 207, "pid": 2338711, "tid": 2338711, "ts": 6274140916368.435, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274140916384.858, "dur": 8.898, + "args": { + "External id": 498552,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140916389.028, "dur": 4.534, + "args": { + "External id": 498553,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274140916465.362, "dur": 73.274, + "args": { + "External id": 498554,"Record function id": 0, "Ev Idx": 12734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274140916540.171, "dur": 1177.998, + "args": { + "External id": 498555,"Record function id": 0, "Ev Idx": 12735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140916579.762, "dur": 1124.111, + "args": { + "External id": 498556,"Sequence number": 5516028, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12736 + } + }, + { + "ph": "s", "id": 206, "pid": 2338711, "tid": 2338711, "ts": 6274140916579.762, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140916663.834, "dur": 50.609, + "args": { + "External id": 498557,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140916727.709, "dur": 105.781, + "args": { + "External id": 498558,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140916847.730, "dur": 44.221, + "args": { + "External id": 498559,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140916900.794, "dur": 30.783, + "args": { + "External id": 498560,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140916960.096, "dur": 27.649, + "args": { + "External id": 498561,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274140917012.372, "dur": 19.098, + "args": { + "External id": 498562,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274140917093.190, "dur": 143.840, + "args": { + "External id": 498563,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274140917151.820, "dur": 12.681, + "args": { + "External id": 498564,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140917157.483, "dur": 6.031, + "args": { + "External id": 498565,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140917167.374, "dur": 3.596, + "args": { + "External id": 498566,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140917172.365, "dur": 1.327, + "args": { + "External id": 498567,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140917176.203, "dur": 4.871, + "args": { + "External id": 498568,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140917248.869, "dur": 55.131, + "args": { + "External id": 498569,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274140917336.213, "dur": 31.915, + "args": { + "External id": 498570,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140917378.164, "dur": 44.176, + "args": { + "External id": 498571,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140917433.759, "dur": 35.487, + "args": { + "External id": 498572,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140917496.161, "dur": 25.818, + "args": { + "External id": 498573,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140917529.572, "dur": 38.676, + "args": { + "External id": 498574,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140917601.042, "dur": 21.757, + "args": { + "External id": 498575,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12755 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.8)", "pid": 2338711, "tid": 2338711, + "ts": 6274140917788.111, "dur": 81.901, + "args": { + "External id": 498576,"Record function id": 0, "Ev Idx": 12756 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274140917948.484, "dur": 52.276, + "args": { + "External id": 498577,"Record function id": 0, "Ev Idx": 12757 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.9)", "pid": 2338711, "tid": 2338711, + "ts": 6274140918009.438, "dur": 30912.343, + "args": { + "External id": 498578,"Record function id": 0, "Ev Idx": 12758 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2338711, "tid": 2338711, + "ts": 6274140918016.669, "dur": 1117.048, + "args": { + "External id": 498579,"Record function id": 0, "Ev Idx": 12759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140918143.159, "dur": 10.627, + "args": { + "External id": 498580,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140918168.191, "dur": 41.882, + "args": { + "External id": 498581,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140918174.523, "dur": 2.701, + "args": { + "External id": 498582,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140918181.999, "dur": 0.492, + "args": { + "External id": 498583,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140918183.867, "dur": 0.526, + "args": { + "External id": 498584,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140918186.486, "dur": 0.354, + "args": { + "External id": 498585,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140918190.922, "dur": 0.354, + "args": { + "External id": 498586,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140918192.971, "dur": 0.311, + "args": { + "External id": 498587,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140918194.920, "dur": 4.374, + "args": { + "External id": 498588,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140918200.668, "dur": 0.609, + "args": { + "External id": 498589,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140918202.991, "dur": 0.495, + "args": { + "External id": 498590,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140918221.643, "dur": 56.828, + "args": { + "External id": 498591,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274140918315.410, "dur": 139.772, + "args": { + "External id": 498592,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140918328.026, "dur": 4.112, + "args": { + "External id": 498593,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274140918337.401, "dur": 13.873, + "args": { + "External id": 498594,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274140918344.460, "dur": 6.360, + "args": { + "External id": 498595,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140918348.938, "dur": 0.676, + "args": { + "External id": 498596,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140918358.320, "dur": 35.678, + "args": { + "External id": 498597,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140918360.878, "dur": 2.679, + "args": { + "External id": 498598,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140918368.152, "dur": 0.542, + "args": { + "External id": 498599,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140918370.327, "dur": 0.487, + "args": { + "External id": 498600,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140918373.981, "dur": 2.922, + "args": { + "External id": 498601,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140918378.453, "dur": 0.510, + "args": { + "External id": 498602,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140918380.440, "dur": 2.061, + "args": { + "External id": 498603,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140918383.775, "dur": 0.536, + "args": { + "External id": 498604,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140918385.762, "dur": 0.556, + "args": { + "External id": 498605,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140918390.082, "dur": 0.306, + "args": { + "External id": 498606,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140918414.328, "dur": 32.840, + "args": { + "External id": 498607,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274140918511.448, "dur": 473.162, + "args": { + "External id": 498608,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140918545.415, "dur": 432.640, + "args": { + "External id": 498609,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12789, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274140918558.101, "dur": 412.513, + "args": { + "External id": 498610,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140919013.252, "dur": 2.446, + "args": { + "External id": 498611,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12791, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2338711, "tid": 2338711, + "ts": 6274140919158.156, "dur": 29560.092, + "args": { + "External id": 498612,"Record function id": 0, "Ev Idx": 12792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140919270.289, "dur": 7.178, + "args": { + "External id": 498613,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140919281.402, "dur": 1.194, + "args": { + "External id": 498614,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140919284.415, "dur": 3.340, + "args": { + "External id": 498615,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140919289.580, "dur": 0.702, + "args": { + "External id": 498616,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140919291.828, "dur": 0.958, + "args": { + "External id": 498617,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140919294.373, "dur": 0.802, + "args": { + "External id": 498618,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140919298.725, "dur": 1.217, + "args": { + "External id": 498619,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140919301.483, "dur": 2.239, + "args": { + "External id": 498620,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140919305.146, "dur": 0.773, + "args": { + "External id": 498621,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140919307.209, "dur": 0.701, + "args": { + "External id": 498622,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140919328.862, "dur": 29345.303, + "args": { + "External id": 498623,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140919345.468, "dur": 29320.758, + "args": { + "External id": 498624,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140919363.522, "dur": 17.950, + "args": { + "External id": 498625,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274140919385.048, "dur": 29247.286, + "args": { + "External id": 498626,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140919388.201, "dur": 29243.552, + "args": { + "External id": 498627,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140919394.714, "dur": 6.491, + "args": { + "External id": 498628,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140919402.929, "dur": 29225.621, + "args": { + "External id": 498629,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140948864.395, "dur": 32.593, + "args": { + "External id": 498630,"Sequence number": 5516029, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12810 + } + }, + { + "ph": "s", "id": 205, "pid": 2338711, "tid": 2338711, "ts": 6274140948864.395, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274140948880.217, "dur": 8.354, + "args": { + "External id": 498631,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140948884.376, "dur": 4.016, + "args": { + "External id": 498632,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274140948967.311, "dur": 75.641, + "args": { + "External id": 498633,"Record function id": 0, "Ev Idx": 12813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274140949044.542, "dur": 1177.840, + "args": { + "External id": 498634,"Record function id": 0, "Ev Idx": 12814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140949122.331, "dur": 1086.182, + "args": { + "External id": 498635,"Sequence number": 5516030, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12815 + } + }, + { + "ph": "s", "id": 204, "pid": 2338711, "tid": 2338711, "ts": 6274140949122.331, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140949188.819, "dur": 52.636, + "args": { + "External id": 498636,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140949253.870, "dur": 91.213, + "args": { + "External id": 498637,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140949358.023, "dur": 37.894, + "args": { + "External id": 498638,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140949404.607, "dur": 38.633, + "args": { + "External id": 498639,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140949469.593, "dur": 32.640, + "args": { + "External id": 498640,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274140949522.185, "dur": 18.692, + "args": { + "External id": 498641,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274140949564.992, "dur": 158.418, + "args": { + "External id": 498642,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274140949635.405, "dur": 13.220, + "args": { + "External id": 498643,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140949641.406, "dur": 6.241, + "args": { + "External id": 498644,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140949651.591, "dur": 4.507, + "args": { + "External id": 498645,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140949657.550, "dur": 1.307, + "args": { + "External id": 498646,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140949661.358, "dur": 5.747, + "args": { + "External id": 498647,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140949735.561, "dur": 54.322, + "args": { + "External id": 498648,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274140949822.339, "dur": 31.182, + "args": { + "External id": 498649,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140949870.273, "dur": 43.422, + "args": { + "External id": 498650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140949922.823, "dur": 34.751, + "args": { + "External id": 498651,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140949982.540, "dur": 26.895, + "args": { + "External id": 498652,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140950017.044, "dur": 69.420, + "args": { + "External id": 498653,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140950110.524, "dur": 23.884, + "args": { + "External id": 498654,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12834 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.9)", "pid": 2338711, "tid": 2338711, + "ts": 6274140950292.817, "dur": 87.639, + "args": { + "External id": 498655,"Record function id": 0, "Ev Idx": 12835 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274140950454.103, "dur": 45.464, + "args": { + "External id": 498656,"Record function id": 0, "Ev Idx": 12836 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.10)", "pid": 2338711, "tid": 2338711, + "ts": 6274140950508.709, "dur": 32746.238, + "args": { + "External id": 498657,"Record function id": 0, "Ev Idx": 12837 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2338711, "tid": 2338711, + "ts": 6274140950516.362, "dur": 1001.708, + "args": { + "External id": 498658,"Record function id": 0, "Ev Idx": 12838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140950614.695, "dur": 10.146, + "args": { + "External id": 498659,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140950638.436, "dur": 39.643, + "args": { + "External id": 498660,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140950644.888, "dur": 2.226, + "args": { + "External id": 498661,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140950651.601, "dur": 0.371, + "args": { + "External id": 498662,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140950653.793, "dur": 0.657, + "args": { + "External id": 498663,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140950656.147, "dur": 0.519, + "args": { + "External id": 498664,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140950660.020, "dur": 0.532, + "args": { + "External id": 498665,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140950661.868, "dur": 0.539, + "args": { + "External id": 498666,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140950664.007, "dur": 4.026, + "args": { + "External id": 498667,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140950669.399, "dur": 0.292, + "args": { + "External id": 498668,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140950671.208, "dur": 0.441, + "args": { + "External id": 498669,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140950690.545, "dur": 53.655, + "args": { + "External id": 498670,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274140950782.941, "dur": 124.780, + "args": { + "External id": 498671,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140950795.421, "dur": 4.599, + "args": { + "External id": 498672,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274140950805.166, "dur": 13.358, + "args": { + "External id": 498673,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274140950812.150, "dur": 5.902, + "args": { + "External id": 498674,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140950816.334, "dur": 0.551, + "args": { + "External id": 498675,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140950825.502, "dur": 30.587, + "args": { + "External id": 498676,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140950828.309, "dur": 2.853, + "args": { + "External id": 498677,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140950832.349, "dur": 0.419, + "args": { + "External id": 498678,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140950834.246, "dur": 0.430, + "args": { + "External id": 498679,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140950838.772, "dur": 2.581, + "args": { + "External id": 498680,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140950842.988, "dur": 0.333, + "args": { + "External id": 498681,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140950844.508, "dur": 0.393, + "args": { + "External id": 498682,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140950846.740, "dur": 0.325, + "args": { + "External id": 498683,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140950848.292, "dur": 0.278, + "args": { + "External id": 498684,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140950849.684, "dur": 2.532, + "args": { + "External id": 498685,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140950867.347, "dur": 32.587, + "args": { + "External id": 498686,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274140950965.569, "dur": 442.190, + "args": { + "External id": 498687,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140950999.713, "dur": 401.884, + "args": { + "External id": 498688,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12868, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274140951034.458, "dur": 361.205, + "args": { + "External id": 498689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140951435.217, "dur": 2.316, + "args": { + "External id": 498690,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12870, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2338711, "tid": 2338711, + "ts": 6274140951540.766, "dur": 31482.718, + "args": { + "External id": 498691,"Record function id": 0, "Ev Idx": 12871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140951662.263, "dur": 7.332, + "args": { + "External id": 498692,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140951674.333, "dur": 1.217, + "args": { + "External id": 498693,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140951677.663, "dur": 3.765, + "args": { + "External id": 498694,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140951683.459, "dur": 0.826, + "args": { + "External id": 498695,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140951685.861, "dur": 0.991, + "args": { + "External id": 498696,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140951688.225, "dur": 0.917, + "args": { + "External id": 498697,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140951693.306, "dur": 1.062, + "args": { + "External id": 498698,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140951695.910, "dur": 2.544, + "args": { + "External id": 498699,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140951700.159, "dur": 0.942, + "args": { + "External id": 498700,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140951702.673, "dur": 0.842, + "args": { + "External id": 498701,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140951725.283, "dur": 31250.940, + "args": { + "External id": 498702,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140951742.313, "dur": 31225.701, + "args": { + "External id": 498703,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140951758.769, "dur": 16.153, + "args": { + "External id": 498704,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274140951778.457, "dur": 31154.844, + "args": { + "External id": 498705,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140951781.285, "dur": 31151.411, + "args": { + "External id": 498706,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140951787.646, "dur": 6.668, + "args": { + "External id": 498707,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140951796.109, "dur": 31133.521, + "args": { + "External id": 498708,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140983197.052, "dur": 30.505, + "args": { + "External id": 498709,"Sequence number": 5516031, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12889 + } + }, + { + "ph": "s", "id": 203, "pid": 2338711, "tid": 2338711, "ts": 6274140983197.052, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274140983213.287, "dur": 9.216, + "args": { + "External id": 498710,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140983217.520, "dur": 4.608, + "args": { + "External id": 498711,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274140983297.850, "dur": 72.199, + "args": { + "External id": 498712,"Record function id": 0, "Ev Idx": 12892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274140983371.433, "dur": 1128.391, + "args": { + "External id": 498713,"Record function id": 0, "Ev Idx": 12893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274140983411.232, "dur": 1073.777, + "args": { + "External id": 498714,"Sequence number": 5516032, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12894 + } + }, + { + "ph": "s", "id": 202, "pid": 2338711, "tid": 2338711, "ts": 6274140983411.232, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140983478.116, "dur": 49.921, + "args": { + "External id": 498715,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140983540.510, "dur": 121.641, + "args": { + "External id": 498716,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140983678.748, "dur": 39.762, + "args": { + "External id": 498717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140983727.654, "dur": 29.612, + "args": { + "External id": 498718,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140983784.844, "dur": 26.708, + "args": { + "External id": 498719,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274140983831.671, "dur": 17.572, + "args": { + "External id": 498720,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274140983872.448, "dur": 134.176, + "args": { + "External id": 498721,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274140983922.012, "dur": 11.637, + "args": { + "External id": 498722,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140983927.561, "dur": 5.250, + "args": { + "External id": 498723,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140983936.756, "dur": 5.384, + "args": { + "External id": 498724,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140983943.569, "dur": 1.046, + "args": { + "External id": 498725,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140983947.076, "dur": 4.929, + "args": { + "External id": 498726,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140984017.607, "dur": 84.619, + "args": { + "External id": 498727,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274140984141.963, "dur": 33.980, + "args": { + "External id": 498728,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140984187.095, "dur": 46.802, + "args": { + "External id": 498729,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140984242.474, "dur": 35.152, + "args": { + "External id": 498730,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274140984300.461, "dur": 27.702, + "args": { + "External id": 498731,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274140984336.456, "dur": 35.523, + "args": { + "External id": 498732,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274140984391.303, "dur": 17.821, + "args": { + "External id": 498733,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12913 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.10)", "pid": 2338711, "tid": 2338711, + "ts": 6274140984567.390, "dur": 100.768, + "args": { + "External id": 498734,"Record function id": 0, "Ev Idx": 12914 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274140984753.940, "dur": 47.858, + "args": { + "External id": 498735,"Record function id": 0, "Ev Idx": 12915 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.11)", "pid": 2338711, "tid": 2338711, + "ts": 6274140984813.742, "dur": 31634.468, + "args": { + "External id": 498736,"Record function id": 0, "Ev Idx": 12916 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2338711, "tid": 2338711, + "ts": 6274140984821.262, "dur": 983.239, + "args": { + "External id": 498737,"Record function id": 0, "Ev Idx": 12917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140984902.581, "dur": 9.667, + "args": { + "External id": 498738,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140984925.414, "dur": 38.460, + "args": { + "External id": 498739,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140984931.218, "dur": 2.658, + "args": { + "External id": 498740,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140984938.192, "dur": 0.522, + "args": { + "External id": 498741,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140984940.214, "dur": 0.506, + "args": { + "External id": 498742,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140984942.141, "dur": 0.400, + "args": { + "External id": 498743,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140984945.741, "dur": 0.548, + "args": { + "External id": 498744,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140984947.985, "dur": 0.400, + "args": { + "External id": 498745,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140984949.718, "dur": 4.423, + "args": { + "External id": 498746,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140984955.214, "dur": 0.669, + "args": { + "External id": 498747,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140984957.412, "dur": 0.442, + "args": { + "External id": 498748,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140984978.956, "dur": 56.816, + "args": { + "External id": 498749,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274140985108.034, "dur": 142.123, + "args": { + "External id": 498750,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140985122.348, "dur": 8.374, + "args": { + "External id": 498751,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274140985136.696, "dur": 14.233, + "args": { + "External id": 498752,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274140985143.483, "dur": 6.910, + "args": { + "External id": 498753,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140985147.856, "dur": 0.710, + "args": { + "External id": 498754,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274140985159.030, "dur": 30.869, + "args": { + "External id": 498755,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140985161.453, "dur": 0.562, + "args": { + "External id": 498756,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140985163.248, "dur": 2.621, + "args": { + "External id": 498757,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140985167.314, "dur": 0.536, + "args": { + "External id": 498758,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140985169.121, "dur": 3.372, + "args": { + "External id": 498759,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140985175.595, "dur": 0.348, + "args": { + "External id": 498760,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140985177.355, "dur": 0.552, + "args": { + "External id": 498761,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140985179.533, "dur": 0.430, + "args": { + "External id": 498762,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140985183.181, "dur": 0.561, + "args": { + "External id": 498763,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140985185.121, "dur": 0.537, + "args": { + "External id": 498764,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140985204.130, "dur": 36.787, + "args": { + "External id": 498765,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274140985307.810, "dur": 394.298, + "args": { + "External id": 498766,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140985340.894, "dur": 355.109, + "args": { + "External id": 498767,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12947, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274140985350.894, "dur": 339.237, + "args": { + "External id": 498768,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274140985727.878, "dur": 2.464, + "args": { + "External id": 498769,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12949, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2338711, "tid": 2338711, + "ts": 6274140985826.249, "dur": 30410.556, + "args": { + "External id": 498770,"Record function id": 0, "Ev Idx": 12950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140985923.553, "dur": 6.560, + "args": { + "External id": 498771,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140985934.392, "dur": 1.007, + "args": { + "External id": 498772,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140985937.062, "dur": 3.334, + "args": { + "External id": 498773,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140985942.395, "dur": 0.792, + "args": { + "External id": 498774,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140985944.476, "dur": 0.896, + "args": { + "External id": 498775,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140985946.926, "dur": 0.747, + "args": { + "External id": 498776,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140985951.366, "dur": 0.660, + "args": { + "External id": 498777,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140985953.486, "dur": 2.276, + "args": { + "External id": 498778,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140985957.431, "dur": 0.668, + "args": { + "External id": 498779,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274140985959.644, "dur": 0.777, + "args": { + "External id": 498780,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140985981.316, "dur": 30208.863, + "args": { + "External id": 498781,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140985997.123, "dur": 30185.200, + "args": { + "External id": 498782,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274140986016.183, "dur": 15.800, + "args": { + "External id": 498783,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274140986035.773, "dur": 30110.000, + "args": { + "External id": 498784,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274140986038.552, "dur": 30106.663, + "args": { + "External id": 498785,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274140986045.093, "dur": 6.254, + "args": { + "External id": 498786,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274140986087.697, "dur": 30054.417, + "args": { + "External id": 498787,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141016390.698, "dur": 32.792, + "args": { + "External id": 498788,"Sequence number": 5516033, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12968 + } + }, + { + "ph": "s", "id": 201, "pid": 2338711, "tid": 2338711, "ts": 6274141016390.698, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274141016409.658, "dur": 9.035, + "args": { + "External id": 498789,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141016413.873, "dur": 4.565, + "args": { + "External id": 498790,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274141016489.280, "dur": 70.615, + "args": { + "External id": 498791,"Record function id": 0, "Ev Idx": 12971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274141016561.113, "dur": 1163.351, + "args": { + "External id": 498792,"Record function id": 0, "Ev Idx": 12972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141016616.318, "dur": 1092.897, + "args": { + "External id": 498793,"Sequence number": 5516034, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12973 + } + }, + { + "ph": "s", "id": 200, "pid": 2338711, "tid": 2338711, "ts": 6274141016616.318, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141016688.869, "dur": 55.053, + "args": { + "External id": 498794,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141016757.183, "dur": 107.348, + "args": { + "External id": 498795,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141016876.805, "dur": 38.247, + "args": { + "External id": 498796,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141016923.211, "dur": 29.280, + "args": { + "External id": 498797,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141016979.351, "dur": 27.767, + "args": { + "External id": 498798,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274141017027.015, "dur": 20.437, + "args": { + "External id": 498799,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274141017107.055, "dur": 138.077, + "args": { + "External id": 498800,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141017159.853, "dur": 12.844, + "args": { + "External id": 498801,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141017165.801, "dur": 5.785, + "args": { + "External id": 498802,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141017175.523, "dur": 3.732, + "args": { + "External id": 498803,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141017180.852, "dur": 0.947, + "args": { + "External id": 498804,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141017184.286, "dur": 6.055, + "args": { + "External id": 498805,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141017260.432, "dur": 57.569, + "args": { + "External id": 498806,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274141017351.618, "dur": 31.931, + "args": { + "External id": 498807,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141017395.612, "dur": 42.195, + "args": { + "External id": 498808,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141017444.795, "dur": 34.429, + "args": { + "External id": 498809,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141017503.115, "dur": 27.744, + "args": { + "External id": 498810,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141017539.312, "dur": 35.000, + "args": { + "External id": 498811,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141017610.530, "dur": 21.210, + "args": { + "External id": 498812,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12992 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.11)", "pid": 2338711, "tid": 2338711, + "ts": 6274141017793.883, "dur": 82.909, + "args": { + "External id": 498813,"Record function id": 0, "Ev Idx": 12993 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274141017952.266, "dur": 47.646, + "args": { + "External id": 498814,"Record function id": 0, "Ev Idx": 12994 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.12)", "pid": 2338711, "tid": 2338711, + "ts": 6274141018009.092, "dur": 32413.801, + "args": { + "External id": 498815,"Record function id": 0, "Ev Idx": 12995 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2338711, "tid": 2338711, + "ts": 6274141018017.706, "dur": 976.155, + "args": { + "External id": 498816,"Record function id": 0, "Ev Idx": 12996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141018139.081, "dur": 11.034, + "args": { + "External id": 498817,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141018166.970, "dur": 38.848, + "args": { + "External id": 498818,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141018173.042, "dur": 2.540, + "args": { + "External id": 498819,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141018180.374, "dur": 0.815, + "args": { + "External id": 498820,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141018182.351, "dur": 0.564, + "args": { + "External id": 498821,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141018184.392, "dur": 3.154, + "args": { + "External id": 498822,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141018188.680, "dur": 0.448, + "args": { + "External id": 498823,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141018190.644, "dur": 0.541, + "args": { + "External id": 498824,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141018194.438, "dur": 2.537, + "args": { + "External id": 498825,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141018197.948, "dur": 0.323, + "args": { + "External id": 498826,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141018199.282, "dur": 0.528, + "args": { + "External id": 498827,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141018217.610, "dur": 55.218, + "args": { + "External id": 498828,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274141018311.451, "dur": 127.453, + "args": { + "External id": 498829,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141018323.918, "dur": 4.165, + "args": { + "External id": 498830,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274141018333.171, "dur": 14.618, + "args": { + "External id": 498831,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141018339.962, "dur": 7.408, + "args": { + "External id": 498832,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141018343.678, "dur": 2.467, + "args": { + "External id": 498833,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141018353.888, "dur": 32.574, + "args": { + "External id": 498834,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141018355.822, "dur": 0.518, + "args": { + "External id": 498835,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141018361.153, "dur": 0.943, + "args": { + "External id": 498836,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141018363.056, "dur": 0.464, + "args": { + "External id": 498837,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141018367.160, "dur": 2.782, + "args": { + "External id": 498838,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141018370.911, "dur": 0.276, + "args": { + "External id": 498839,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141018372.598, "dur": 2.772, + "args": { + "External id": 498840,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141018376.547, "dur": 0.422, + "args": { + "External id": 498841,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141018378.402, "dur": 0.317, + "args": { + "External id": 498842,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141018381.946, "dur": 0.463, + "args": { + "External id": 498843,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141018396.893, "dur": 34.393, + "args": { + "External id": 498844,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274141018493.441, "dur": 397.476, + "args": { + "External id": 498845,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141018527.353, "dur": 358.155, + "args": { + "External id": 498846,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13026, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274141018537.936, "dur": 340.952, + "args": { + "External id": 498847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141018916.711, "dur": 2.502, + "args": { + "External id": 498848,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13028, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2338711, "tid": 2338711, + "ts": 6274141019014.854, "dur": 31213.742, + "args": { + "External id": 498849,"Record function id": 0, "Ev Idx": 13029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141019151.843, "dur": 6.803, + "args": { + "External id": 498850,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141019163.028, "dur": 1.458, + "args": { + "External id": 498851,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141019166.814, "dur": 3.175, + "args": { + "External id": 498852,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141019171.842, "dur": 1.103, + "args": { + "External id": 498853,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141019174.469, "dur": 0.910, + "args": { + "External id": 498854,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141019176.757, "dur": 1.175, + "args": { + "External id": 498855,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141019181.869, "dur": 0.900, + "args": { + "External id": 498856,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141019184.209, "dur": 1.745, + "args": { + "External id": 498857,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141019187.712, "dur": 0.764, + "args": { + "External id": 498858,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141019190.034, "dur": 0.744, + "args": { + "External id": 498859,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141019212.491, "dur": 30970.209, + "args": { + "External id": 498860,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141019228.446, "dur": 30946.633, + "args": { + "External id": 498861,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141019248.319, "dur": 17.212, + "args": { + "External id": 498862,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141019269.352, "dur": 30869.016, + "args": { + "External id": 498863,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141019272.236, "dur": 30865.453, + "args": { + "External id": 498864,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141019278.996, "dur": 5.983, + "args": { + "External id": 498865,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141019286.880, "dur": 30847.677, + "args": { + "External id": 498866,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141050368.269, "dur": 29.367, + "args": { + "External id": 498867,"Sequence number": 5516035, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13047 + } + }, + { + "ph": "s", "id": 199, "pid": 2338711, "tid": 2338711, "ts": 6274141050368.269, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274141050384.006, "dur": 8.652, + "args": { + "External id": 498868,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141050388.017, "dur": 4.363, + "args": { + "External id": 498869,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274141050462.807, "dur": 70.749, + "args": { + "External id": 498870,"Record function id": 0, "Ev Idx": 13050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274141050534.827, "dur": 1138.747, + "args": { + "External id": 498871,"Record function id": 0, "Ev Idx": 13051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141050574.830, "dur": 1083.852, + "args": { + "External id": 498872,"Sequence number": 5516036, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13052 + } + }, + { + "ph": "s", "id": 198, "pid": 2338711, "tid": 2338711, "ts": 6274141050574.830, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141050655.647, "dur": 50.011, + "args": { + "External id": 498873,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141050719.149, "dur": 101.525, + "args": { + "External id": 498874,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141050836.850, "dur": 40.922, + "args": { + "External id": 498875,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141050886.950, "dur": 29.212, + "args": { + "External id": 498876,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141050942.876, "dur": 26.426, + "args": { + "External id": 498877,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274141050988.829, "dur": 17.708, + "args": { + "External id": 498878,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274141051030.011, "dur": 180.849, + "args": { + "External id": 498879,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141051125.982, "dur": 12.930, + "args": { + "External id": 498880,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141051131.492, "dur": 6.334, + "args": { + "External id": 498881,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141051141.711, "dur": 5.066, + "args": { + "External id": 498882,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141051148.131, "dur": 1.097, + "args": { + "External id": 498883,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141051151.667, "dur": 4.662, + "args": { + "External id": 498884,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141051223.181, "dur": 54.216, + "args": { + "External id": 498885,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274141051309.690, "dur": 27.563, + "args": { + "External id": 498886,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141051347.350, "dur": 42.144, + "args": { + "External id": 498887,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141051397.771, "dur": 35.392, + "args": { + "External id": 498888,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141051462.864, "dur": 29.137, + "args": { + "External id": 498889,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141051499.741, "dur": 36.629, + "args": { + "External id": 498890,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141051553.877, "dur": 18.949, + "args": { + "External id": 498891,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13071 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.12)", "pid": 2338711, "tid": 2338711, + "ts": 6274141051744.338, "dur": 78.934, + "args": { + "External id": 498892,"Record function id": 0, "Ev Idx": 13072 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274141051898.981, "dur": 45.889, + "args": { + "External id": 498893,"Record function id": 0, "Ev Idx": 13073 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.13)", "pid": 2338711, "tid": 2338711, + "ts": 6274141051953.435, "dur": 30338.269, + "args": { + "External id": 498894,"Record function id": 0, "Ev Idx": 13074 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2338711, "tid": 2338711, + "ts": 6274141051960.761, "dur": 966.809, + "args": { + "External id": 498895,"Record function id": 0, "Ev Idx": 13075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141052041.758, "dur": 10.270, + "args": { + "External id": 498896,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141052104.942, "dur": 36.764, + "args": { + "External id": 498897,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141052110.559, "dur": 2.680, + "args": { + "External id": 498898,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141052117.403, "dur": 0.648, + "args": { + "External id": 498899,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141052119.480, "dur": 0.443, + "args": { + "External id": 498900,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141052121.080, "dur": 0.687, + "args": { + "External id": 498901,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141052124.945, "dur": 0.741, + "args": { + "External id": 498902,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141052126.926, "dur": 0.559, + "args": { + "External id": 498903,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141052128.228, "dur": 4.582, + "args": { + "External id": 498904,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141052133.666, "dur": 0.345, + "args": { + "External id": 498905,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141052135.314, "dur": 0.380, + "args": { + "External id": 498906,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141052152.933, "dur": 58.075, + "args": { + "External id": 498907,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274141052249.815, "dur": 129.890, + "args": { + "External id": 498908,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141052263.491, "dur": 5.415, + "args": { + "External id": 498909,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274141052274.180, "dur": 11.587, + "args": { + "External id": 498910,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141052278.844, "dur": 6.511, + "args": { + "External id": 498911,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141052283.447, "dur": 0.612, + "args": { + "External id": 498912,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141052292.447, "dur": 34.147, + "args": { + "External id": 498913,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141052294.715, "dur": 2.722, + "args": { + "External id": 498914,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141052298.963, "dur": 0.332, + "args": { + "External id": 498915,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141052301.131, "dur": 0.523, + "args": { + "External id": 498916,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141052305.263, "dur": 2.493, + "args": { + "External id": 498917,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141052309.763, "dur": 0.328, + "args": { + "External id": 498918,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141052311.908, "dur": 0.319, + "args": { + "External id": 498919,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141052315.388, "dur": 0.479, + "args": { + "External id": 498920,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141052317.621, "dur": 0.413, + "args": { + "External id": 498921,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141052319.794, "dur": 2.358, + "args": { + "External id": 498922,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141052340.349, "dur": 31.331, + "args": { + "External id": 498923,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274141052436.305, "dur": 388.987, + "args": { + "External id": 498924,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141052469.329, "dur": 350.733, + "args": { + "External id": 498925,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13105, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274141052480.399, "dur": 333.991, + "args": { + "External id": 498926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141052850.532, "dur": 2.162, + "args": { + "External id": 498927,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13107, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2338711, "tid": 2338711, + "ts": 6274141052949.504, "dur": 29147.489, + "args": { + "External id": 498928,"Record function id": 0, "Ev Idx": 13108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141053049.174, "dur": 41.153, + "args": { + "External id": 498929,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141053096.755, "dur": 1.268, + "args": { + "External id": 498930,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141053099.839, "dur": 3.585, + "args": { + "External id": 498931,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141053105.115, "dur": 0.874, + "args": { + "External id": 498932,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141053107.380, "dur": 1.030, + "args": { + "External id": 498933,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141053110.127, "dur": 0.964, + "args": { + "External id": 498934,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141053115.721, "dur": 0.695, + "args": { + "External id": 498935,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141053118.055, "dur": 2.204, + "args": { + "External id": 498936,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141053122.064, "dur": 0.844, + "args": { + "External id": 498937,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141053124.674, "dur": 0.871, + "args": { + "External id": 498938,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141053148.321, "dur": 28882.600, + "args": { + "External id": 498939,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141053164.675, "dur": 28858.270, + "args": { + "External id": 498940,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141053182.365, "dur": 16.572, + "args": { + "External id": 498941,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141053202.755, "dur": 28785.015, + "args": { + "External id": 498942,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141053205.916, "dur": 28781.209, + "args": { + "External id": 498943,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141053212.014, "dur": 5.322, + "args": { + "External id": 498944,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141053219.063, "dur": 28764.668, + "args": { + "External id": 498945,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141082235.185, "dur": 31.665, + "args": { + "External id": 498946,"Sequence number": 5516037, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13126 + } + }, + { + "ph": "s", "id": 197, "pid": 2338711, "tid": 2338711, "ts": 6274141082235.185, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274141082252.694, "dur": 9.376, + "args": { + "External id": 498947,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141082257.080, "dur": 4.739, + "args": { + "External id": 498948,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274141082331.548, "dur": 72.563, + "args": { + "External id": 498949,"Record function id": 0, "Ev Idx": 13129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274141082405.638, "dur": 1133.462, + "args": { + "External id": 498950,"Record function id": 0, "Ev Idx": 13130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141082445.722, "dur": 1079.296, + "args": { + "External id": 498951,"Sequence number": 5516038, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13131 + } + }, + { + "ph": "s", "id": 196, "pid": 2338711, "tid": 2338711, "ts": 6274141082445.722, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141082513.110, "dur": 55.496, + "args": { + "External id": 498952,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141082581.200, "dur": 123.525, + "args": { + "External id": 498953,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141082720.109, "dur": 38.062, + "args": { + "External id": 498954,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141082768.120, "dur": 30.181, + "args": { + "External id": 498955,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141082825.494, "dur": 27.092, + "args": { + "External id": 498956,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274141082872.851, "dur": 17.547, + "args": { + "External id": 498957,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274141082913.587, "dur": 134.529, + "args": { + "External id": 498958,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141082964.510, "dur": 11.539, + "args": { + "External id": 498959,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141082970.033, "dur": 5.359, + "args": { + "External id": 498960,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141082979.067, "dur": 4.456, + "args": { + "External id": 498961,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141082985.106, "dur": 1.171, + "args": { + "External id": 498962,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141082989.014, "dur": 4.793, + "args": { + "External id": 498963,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141083100.699, "dur": 58.143, + "args": { + "External id": 498964,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274141083193.361, "dur": 30.856, + "args": { + "External id": 498965,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141083236.384, "dur": 43.199, + "args": { + "External id": 498966,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141083289.234, "dur": 34.683, + "args": { + "External id": 498967,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141083347.436, "dur": 25.599, + "args": { + "External id": 498968,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141083380.966, "dur": 34.976, + "args": { + "External id": 498969,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141083434.757, "dur": 18.062, + "args": { + "External id": 498970,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13150 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.13)", "pid": 2338711, "tid": 2338711, + "ts": 6274141083627.845, "dur": 83.667, + "args": { + "External id": 498971,"Record function id": 0, "Ev Idx": 13151 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274141083791.940, "dur": 48.401, + "args": { + "External id": 498972,"Record function id": 0, "Ev Idx": 13152 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.14)", "pid": 2338711, "tid": 2338711, + "ts": 6274141083849.170, "dur": 32543.337, + "args": { + "External id": 498973,"Record function id": 0, "Ev Idx": 13153 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2338711, "tid": 2338711, + "ts": 6274141083858.266, "dur": 914.056, + "args": { + "External id": 498974,"Record function id": 0, "Ev Idx": 13154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141083942.501, "dur": 12.963, + "args": { + "External id": 498975,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141083968.540, "dur": 50.495, + "args": { + "External id": 498976,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141083978.821, "dur": 2.326, + "args": { + "External id": 498977,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141083985.444, "dur": 0.670, + "args": { + "External id": 498978,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141083987.468, "dur": 0.336, + "args": { + "External id": 498979,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141083989.334, "dur": 2.152, + "args": { + "External id": 498980,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141083992.934, "dur": 0.678, + "args": { + "External id": 498981,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141083997.051, "dur": 0.565, + "args": { + "External id": 498982,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141084000.990, "dur": 2.505, + "args": { + "External id": 498983,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141084005.081, "dur": 0.334, + "args": { + "External id": 498984,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141084010.183, "dur": 1.952, + "args": { + "External id": 498985,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141084030.634, "dur": 96.949, + "args": { + "External id": 498986,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274141084168.724, "dur": 127.482, + "args": { + "External id": 498987,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141084181.371, "dur": 5.679, + "args": { + "External id": 498988,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274141084195.070, "dur": 11.255, + "args": { + "External id": 498989,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141084199.590, "dur": 6.318, + "args": { + "External id": 498990,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141084203.699, "dur": 0.648, + "args": { + "External id": 498991,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141084213.786, "dur": 30.102, + "args": { + "External id": 498992,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141084216.150, "dur": 0.413, + "args": { + "External id": 498993,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141084218.747, "dur": 0.335, + "args": { + "External id": 498994,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141084220.489, "dur": 2.056, + "args": { + "External id": 498995,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141084224.288, "dur": 2.459, + "args": { + "External id": 498996,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141084228.113, "dur": 0.284, + "args": { + "External id": 498997,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141084231.500, "dur": 0.521, + "args": { + "External id": 498998,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141084233.464, "dur": 0.361, + "args": { + "External id": 498999,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141084235.726, "dur": 0.353, + "args": { + "External id": 499000,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141084239.460, "dur": 0.267, + "args": { + "External id": 499001,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141084255.154, "dur": 33.010, + "args": { + "External id": 499002,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274141084354.497, "dur": 317.951, + "args": { + "External id": 499003,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141084386.658, "dur": 280.095, + "args": { + "External id": 499004,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13184, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274141084396.669, "dur": 264.160, + "args": { + "External id": 499005,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141084697.608, "dur": 2.380, + "args": { + "External id": 499006,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13186, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2338711, "tid": 2338711, + "ts": 6274141084793.122, "dur": 31402.377, + "args": { + "External id": 499007,"Record function id": 0, "Ev Idx": 13187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141084887.821, "dur": 6.394, + "args": { + "External id": 499008,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141084897.470, "dur": 1.252, + "args": { + "External id": 499009,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141084900.522, "dur": 3.434, + "args": { + "External id": 499010,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141084905.796, "dur": 0.861, + "args": { + "External id": 499011,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141084908.431, "dur": 0.898, + "args": { + "External id": 499012,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141084910.522, "dur": 0.996, + "args": { + "External id": 499013,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141084915.738, "dur": 0.790, + "args": { + "External id": 499014,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141084918.009, "dur": 1.847, + "args": { + "External id": 499015,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141084921.576, "dur": 0.903, + "args": { + "External id": 499016,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141084923.889, "dur": 0.751, + "args": { + "External id": 499017,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141084944.067, "dur": 31208.482, + "args": { + "External id": 499018,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141084959.508, "dur": 31185.527, + "args": { + "External id": 499019,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141084974.629, "dur": 15.369, + "args": { + "External id": 499020,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141084993.707, "dur": 31118.813, + "args": { + "External id": 499021,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141084996.605, "dur": 31115.307, + "args": { + "External id": 499022,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141085002.785, "dur": 5.432, + "args": { + "External id": 499023,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141085010.363, "dur": 31098.646, + "args": { + "External id": 499024,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141116333.641, "dur": 30.139, + "args": { + "External id": 499025,"Sequence number": 5516039, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13205 + } + }, + { + "ph": "s", "id": 195, "pid": 2338711, "tid": 2338711, "ts": 6274141116333.641, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274141116350.507, "dur": 8.615, + "args": { + "External id": 499026,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141116354.528, "dur": 4.391, + "args": { + "External id": 499027,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274141116433.041, "dur": 72.154, + "args": { + "External id": 499028,"Record function id": 0, "Ev Idx": 13208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274141116506.460, "dur": 1134.778, + "args": { + "External id": 499029,"Record function id": 0, "Ev Idx": 13209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141116546.276, "dur": 1080.212, + "args": { + "External id": 499030,"Sequence number": 5516040, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13210 + } + }, + { + "ph": "s", "id": 194, "pid": 2338711, "tid": 2338711, "ts": 6274141116546.276, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141116627.119, "dur": 50.671, + "args": { + "External id": 499031,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141116692.238, "dur": 101.601, + "args": { + "External id": 499032,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141116804.576, "dur": 38.318, + "args": { + "External id": 499033,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141116852.958, "dur": 29.687, + "args": { + "External id": 499034,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141116908.985, "dur": 26.472, + "args": { + "External id": 499035,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274141116954.967, "dur": 16.117, + "args": { + "External id": 499036,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274141116998.239, "dur": 184.270, + "args": { + "External id": 499037,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141117089.947, "dur": 14.520, + "args": { + "External id": 499038,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141117096.321, "dur": 7.032, + "args": { + "External id": 499039,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141117107.904, "dur": 3.962, + "args": { + "External id": 499040,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141117113.234, "dur": 1.056, + "args": { + "External id": 499041,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141117118.770, "dur": 4.627, + "args": { + "External id": 499042,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141117195.015, "dur": 52.080, + "args": { + "External id": 499043,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274141117279.872, "dur": 31.736, + "args": { + "External id": 499044,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141117322.342, "dur": 45.037, + "args": { + "External id": 499045,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141117373.660, "dur": 34.471, + "args": { + "External id": 499046,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141117432.018, "dur": 28.821, + "args": { + "External id": 499047,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141117466.491, "dur": 35.395, + "args": { + "External id": 499048,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141117521.956, "dur": 17.298, + "args": { + "External id": 499049,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13229 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.14)", "pid": 2338711, "tid": 2338711, + "ts": 6274141117709.873, "dur": 81.446, + "args": { + "External id": 499050,"Record function id": 0, "Ev Idx": 13230 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274141117865.534, "dur": 48.313, + "args": { + "External id": 499051,"Record function id": 0, "Ev Idx": 13231 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.15)", "pid": 2338711, "tid": 2338711, + "ts": 6274141117922.570, "dur": 32812.479, + "args": { + "External id": 499052,"Record function id": 0, "Ev Idx": 13232 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2338711, "tid": 2338711, + "ts": 6274141117931.441, "dur": 1048.839, + "args": { + "External id": 499053,"Record function id": 0, "Ev Idx": 13233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141118012.700, "dur": 9.257, + "args": { + "External id": 499054,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141118034.905, "dur": 80.178, + "args": { + "External id": 499055,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141118040.722, "dur": 2.444, + "args": { + "External id": 499056,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141118047.565, "dur": 0.460, + "args": { + "External id": 499057,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141118049.853, "dur": 0.462, + "args": { + "External id": 499058,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141118052.021, "dur": 34.423, + "args": { + "External id": 499059,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141118091.124, "dur": 0.747, + "args": { + "External id": 499060,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141118093.301, "dur": 0.594, + "args": { + "External id": 499061,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141118095.335, "dur": 8.812, + "args": { + "External id": 499062,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141118105.550, "dur": 0.441, + "args": { + "External id": 499063,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141118107.649, "dur": 0.433, + "args": { + "External id": 499064,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141118127.595, "dur": 59.982, + "args": { + "External id": 499065,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274141118228.301, "dur": 148.461, + "args": { + "External id": 499066,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141118240.644, "dur": 5.201, + "args": { + "External id": 499067,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274141118251.418, "dur": 11.514, + "args": { + "External id": 499068,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141118256.220, "dur": 6.337, + "args": { + "External id": 499069,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141118260.707, "dur": 0.691, + "args": { + "External id": 499070,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141118270.523, "dur": 34.287, + "args": { + "External id": 499071,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141118274.157, "dur": 2.770, + "args": { + "External id": 499072,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141118278.883, "dur": 0.475, + "args": { + "External id": 499073,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141118280.776, "dur": 0.586, + "args": { + "External id": 499074,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141118284.729, "dur": 2.445, + "args": { + "External id": 499075,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141118288.733, "dur": 0.443, + "args": { + "External id": 499076,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141118290.450, "dur": 0.647, + "args": { + "External id": 499077,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141118294.325, "dur": 0.435, + "args": { + "External id": 499078,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141118296.633, "dur": 0.420, + "args": { + "External id": 499079,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141118298.466, "dur": 2.025, + "args": { + "External id": 499080,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141118326.331, "dur": 42.122, + "args": { + "External id": 499081,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274141118434.931, "dur": 439.603, + "args": { + "External id": 499082,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141118466.793, "dur": 401.552, + "args": { + "External id": 499083,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13263, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274141118479.496, "dur": 382.129, + "args": { + "External id": 499084,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141118902.001, "dur": 3.220, + "args": { + "External id": 499085,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13265, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2338711, "tid": 2338711, + "ts": 6274141119002.559, "dur": 31512.976, + "args": { + "External id": 499086,"Record function id": 0, "Ev Idx": 13266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141119138.736, "dur": 7.221, + "args": { + "External id": 499087,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141119149.918, "dur": 1.207, + "args": { + "External id": 499088,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141119153.004, "dur": 2.887, + "args": { + "External id": 499089,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141119157.696, "dur": 0.970, + "args": { + "External id": 499090,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141119159.914, "dur": 1.284, + "args": { + "External id": 499091,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141119165.351, "dur": 1.051, + "args": { + "External id": 499092,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141119168.065, "dur": 0.900, + "args": { + "External id": 499093,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141119170.603, "dur": 2.286, + "args": { + "External id": 499094,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141119174.300, "dur": 0.816, + "args": { + "External id": 499095,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141119179.164, "dur": 0.922, + "args": { + "External id": 499096,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141119200.435, "dur": 31265.386, + "args": { + "External id": 499097,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141119218.261, "dur": 31239.693, + "args": { + "External id": 499098,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141119237.608, "dur": 15.552, + "args": { + "External id": 499099,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141119256.758, "dur": 31162.466, + "args": { + "External id": 499100,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141119259.636, "dur": 31158.972, + "args": { + "External id": 499101,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141119266.197, "dur": 5.110, + "args": { + "External id": 499102,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141119273.032, "dur": 31142.529, + "args": { + "External id": 499103,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141150671.107, "dur": 33.620, + "args": { + "External id": 499104,"Sequence number": 5516041, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13284 + } + }, + { + "ph": "s", "id": 193, "pid": 2338711, "tid": 2338711, "ts": 6274141150671.107, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274141150691.247, "dur": 8.694, + "args": { + "External id": 499105,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141150695.108, "dur": 4.446, + "args": { + "External id": 499106,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274141150778.563, "dur": 71.955, + "args": { + "External id": 499107,"Record function id": 0, "Ev Idx": 13287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274141150852.068, "dur": 1117.312, + "args": { + "External id": 499108,"Record function id": 0, "Ev Idx": 13288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141150890.559, "dur": 1064.746, + "args": { + "External id": 499109,"Sequence number": 5516042, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13289 + } + }, + { + "ph": "s", "id": 192, "pid": 2338711, "tid": 2338711, "ts": 6274141150890.559, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141150959.654, "dur": 50.442, + "args": { + "External id": 499110,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141151023.267, "dur": 133.119, + "args": { + "External id": 499111,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141151171.506, "dur": 39.339, + "args": { + "External id": 499112,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141151220.962, "dur": 29.863, + "args": { + "External id": 499113,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141151279.194, "dur": 30.876, + "args": { + "External id": 499114,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274141151330.661, "dur": 16.720, + "args": { + "External id": 499115,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274141151371.814, "dur": 135.369, + "args": { + "External id": 499116,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141151425.010, "dur": 12.692, + "args": { + "External id": 499117,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141151430.952, "dur": 5.917, + "args": { + "External id": 499118,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141151441.162, "dur": 3.913, + "args": { + "External id": 499119,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141151446.352, "dur": 1.233, + "args": { + "External id": 499120,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141151450.021, "dur": 4.787, + "args": { + "External id": 499121,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141151517.783, "dur": 48.410, + "args": { + "External id": 499122,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274141151612.027, "dur": 32.721, + "args": { + "External id": 499123,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141151657.311, "dur": 46.279, + "args": { + "External id": 499124,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141151712.158, "dur": 35.073, + "args": { + "External id": 499125,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141151771.924, "dur": 26.654, + "args": { + "External id": 499126,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141151806.133, "dur": 35.693, + "args": { + "External id": 499127,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141151858.969, "dur": 19.460, + "args": { + "External id": 499128,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13308 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.15)", "pid": 2338711, "tid": 2338711, + "ts": 6274141152038.620, "dur": 119.510, + "args": { + "External id": 499129,"Record function id": 0, "Ev Idx": 13309 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274141152236.278, "dur": 48.589, + "args": { + "External id": 499130,"Record function id": 0, "Ev Idx": 13310 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.16)", "pid": 2338711, "tid": 2338711, + "ts": 6274141152298.029, "dur": 32535.081, + "args": { + "External id": 499131,"Record function id": 0, "Ev Idx": 13311 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2338711, "tid": 2338711, + "ts": 6274141152305.690, "dur": 992.337, + "args": { + "External id": 499132,"Record function id": 0, "Ev Idx": 13312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141152389.889, "dur": 8.918, + "args": { + "External id": 499133,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141152412.381, "dur": 38.668, + "args": { + "External id": 499134,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141152418.391, "dur": 2.239, + "args": { + "External id": 499135,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141152425.796, "dur": 0.407, + "args": { + "External id": 499136,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141152427.530, "dur": 0.297, + "args": { + "External id": 499137,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141152430.108, "dur": 0.463, + "args": { + "External id": 499138,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141152434.355, "dur": 0.384, + "args": { + "External id": 499139,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141152436.271, "dur": 0.419, + "args": { + "External id": 499140,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141152438.279, "dur": 2.601, + "args": { + "External id": 499141,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141152442.637, "dur": 0.272, + "args": { + "External id": 499142,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141152444.146, "dur": 0.294, + "args": { + "External id": 499143,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141152468.583, "dur": 58.613, + "args": { + "External id": 499144,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274141152561.345, "dur": 154.921, + "args": { + "External id": 499145,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141152572.151, "dur": 5.847, + "args": { + "External id": 499146,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274141152602.073, "dur": 13.321, + "args": { + "External id": 499147,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141152606.936, "dur": 8.043, + "args": { + "External id": 499148,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141152611.949, "dur": 1.101, + "args": { + "External id": 499149,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141152624.247, "dur": 33.599, + "args": { + "External id": 499150,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141152626.952, "dur": 0.527, + "args": { + "External id": 499151,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141152629.198, "dur": 2.374, + "args": { + "External id": 499152,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141152633.276, "dur": 0.436, + "args": { + "External id": 499153,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141152635.821, "dur": 3.073, + "args": { + "External id": 499154,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141152642.589, "dur": 0.349, + "args": { + "External id": 499155,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141152644.532, "dur": 0.468, + "args": { + "External id": 499156,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141152646.809, "dur": 0.622, + "args": { + "External id": 499157,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141152650.876, "dur": 0.342, + "args": { + "External id": 499158,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141152652.643, "dur": 0.763, + "args": { + "External id": 499159,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141152670.419, "dur": 37.175, + "args": { + "External id": 499160,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274141152771.982, "dur": 416.754, + "args": { + "External id": 499161,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141152805.292, "dur": 377.192, + "args": { + "External id": 499162,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13342, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274141152815.413, "dur": 360.103, + "args": { + "External id": 499163,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141153215.631, "dur": 2.299, + "args": { + "External id": 499164,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13344, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2338711, "tid": 2338711, + "ts": 6274141153321.512, "dur": 31310.728, + "args": { + "External id": 499165,"Record function id": 0, "Ev Idx": 13345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141153416.346, "dur": 6.964, + "args": { + "External id": 499166,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141153426.751, "dur": 1.174, + "args": { + "External id": 499167,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141153429.511, "dur": 3.028, + "args": { + "External id": 499168,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141153434.117, "dur": 1.036, + "args": { + "External id": 499169,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141153436.652, "dur": 0.793, + "args": { + "External id": 499170,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141153438.972, "dur": 0.727, + "args": { + "External id": 499171,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141153443.177, "dur": 0.815, + "args": { + "External id": 499172,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141153445.441, "dur": 2.276, + "args": { + "External id": 499173,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141153449.372, "dur": 0.879, + "args": { + "External id": 499174,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141153451.559, "dur": 0.680, + "args": { + "External id": 499175,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141153473.697, "dur": 31101.591, + "args": { + "External id": 499176,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141153490.114, "dur": 31076.682, + "args": { + "External id": 499177,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141153511.385, "dur": 16.527, + "args": { + "External id": 499178,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141153531.743, "dur": 30999.433, + "args": { + "External id": 499179,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141153534.507, "dur": 30995.951, + "args": { + "External id": 499180,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141153540.512, "dur": 5.877, + "args": { + "External id": 499181,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141153548.108, "dur": 30978.994, + "args": { + "External id": 499182,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141184771.827, "dur": 32.289, + "args": { + "External id": 499183,"Sequence number": 5516043, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13363 + } + }, + { + "ph": "s", "id": 191, "pid": 2338711, "tid": 2338711, "ts": 6274141184771.827, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274141184789.792, "dur": 9.435, + "args": { + "External id": 499184,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141184794.229, "dur": 4.795, + "args": { + "External id": 499185,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274141184873.474, "dur": 71.938, + "args": { + "External id": 499186,"Record function id": 0, "Ev Idx": 13366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274141184947.061, "dur": 1188.395, + "args": { + "External id": 499187,"Record function id": 0, "Ev Idx": 13367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141184987.018, "dur": 1132.380, + "args": { + "External id": 499188,"Sequence number": 5516044, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13368 + } + }, + { + "ph": "s", "id": 190, "pid": 2338711, "tid": 2338711, "ts": 6274141184987.018, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141185052.470, "dur": 79.602, + "args": { + "External id": 499189,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141185148.908, "dur": 107.266, + "args": { + "External id": 499190,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141185267.382, "dur": 40.238, + "args": { + "External id": 499191,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141185317.981, "dur": 30.302, + "args": { + "External id": 499192,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141185380.713, "dur": 30.224, + "args": { + "External id": 499193,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274141185429.814, "dur": 16.838, + "args": { + "External id": 499194,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274141185472.638, "dur": 152.533, + "args": { + "External id": 499195,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141185523.428, "dur": 12.887, + "args": { + "External id": 499196,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141185529.782, "dur": 5.740, + "args": { + "External id": 499197,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141185539.548, "dur": 4.521, + "args": { + "External id": 499198,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141185545.542, "dur": 1.182, + "args": { + "External id": 499199,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141185549.107, "dur": 6.376, + "args": { + "External id": 499200,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141185660.539, "dur": 57.359, + "args": { + "External id": 499201,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274141185754.666, "dur": 30.827, + "args": { + "External id": 499202,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141185795.987, "dur": 44.335, + "args": { + "External id": 499203,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141185850.296, "dur": 35.284, + "args": { + "External id": 499204,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141185907.371, "dur": 24.103, + "args": { + "External id": 499205,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141185936.905, "dur": 35.059, + "args": { + "External id": 499206,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141185994.222, "dur": 17.418, + "args": { + "External id": 499207,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13387 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.16)", "pid": 2338711, "tid": 2338711, + "ts": 6274141186205.567, "dur": 80.555, + "args": { + "External id": 499208,"Record function id": 0, "Ev Idx": 13388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274141186360.504, "dur": 47.438, + "args": { + "External id": 499209,"Record function id": 0, "Ev Idx": 13389 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.17)", "pid": 2338711, "tid": 2338711, + "ts": 6274141186416.649, "dur": 31198.663, + "args": { + "External id": 499210,"Record function id": 0, "Ev Idx": 13390 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2338711, "tid": 2338711, + "ts": 6274141186424.753, "dur": 980.805, + "args": { + "External id": 499211,"Record function id": 0, "Ev Idx": 13391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141186508.566, "dur": 9.449, + "args": { + "External id": 499212,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141186533.833, "dur": 40.393, + "args": { + "External id": 499213,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141186539.707, "dur": 2.484, + "args": { + "External id": 499214,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141186547.943, "dur": 0.374, + "args": { + "External id": 499215,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141186549.545, "dur": 0.535, + "args": { + "External id": 499216,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141186551.691, "dur": 0.400, + "args": { + "External id": 499217,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141186555.253, "dur": 0.561, + "args": { + "External id": 499218,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141186557.197, "dur": 0.578, + "args": { + "External id": 499219,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141186559.630, "dur": 3.834, + "args": { + "External id": 499220,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141186565.026, "dur": 0.613, + "args": { + "External id": 499221,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141186567.070, "dur": 0.479, + "args": { + "External id": 499222,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141186605.594, "dur": 60.349, + "args": { + "External id": 499223,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274141186706.143, "dur": 132.773, + "args": { + "External id": 499224,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141186718.390, "dur": 5.233, + "args": { + "External id": 499225,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274141186728.809, "dur": 11.232, + "args": { + "External id": 499226,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141186733.444, "dur": 6.203, + "args": { + "External id": 499227,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141186737.409, "dur": 0.743, + "args": { + "External id": 499228,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141186747.938, "dur": 33.913, + "args": { + "External id": 499229,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141186750.760, "dur": 2.798, + "args": { + "External id": 499230,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141186755.430, "dur": 0.433, + "args": { + "External id": 499231,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141186757.946, "dur": 0.304, + "args": { + "External id": 499232,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141186761.598, "dur": 2.942, + "args": { + "External id": 499233,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141186766.189, "dur": 0.300, + "args": { + "External id": 499234,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141186768.040, "dur": 0.243, + "args": { + "External id": 499235,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141186771.414, "dur": 0.245, + "args": { + "External id": 499236,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141186773.205, "dur": 0.565, + "args": { + "External id": 499237,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141186775.395, "dur": 2.224, + "args": { + "External id": 499238,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141186795.639, "dur": 35.172, + "args": { + "External id": 499239,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274141186896.922, "dur": 401.487, + "args": { + "External id": 499240,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141186928.998, "dur": 363.521, + "args": { + "External id": 499241,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13421, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274141186939.172, "dur": 344.849, + "args": { + "External id": 499242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141187326.030, "dur": 2.503, + "args": { + "External id": 499243,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13423, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2338711, "tid": 2338711, + "ts": 6274141187426.977, "dur": 29980.802, + "args": { + "External id": 499244,"Record function id": 0, "Ev Idx": 13424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141187525.202, "dur": 6.518, + "args": { + "External id": 499245,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141187535.150, "dur": 1.089, + "args": { + "External id": 499246,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141187537.907, "dur": 2.991, + "args": { + "External id": 499247,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141187542.848, "dur": 0.678, + "args": { + "External id": 499248,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141187544.780, "dur": 0.565, + "args": { + "External id": 499249,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141187546.951, "dur": 0.974, + "args": { + "External id": 499250,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141187551.510, "dur": 0.773, + "args": { + "External id": 499251,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141187554.104, "dur": 2.345, + "args": { + "External id": 499252,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141187558.222, "dur": 0.755, + "args": { + "External id": 499253,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141187560.765, "dur": 0.931, + "args": { + "External id": 499254,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141187583.177, "dur": 29777.411, + "args": { + "External id": 499255,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141187617.335, "dur": 29735.375, + "args": { + "External id": 499256,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141187643.155, "dur": 18.361, + "args": { + "External id": 499257,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141187665.379, "dur": 29652.849, + "args": { + "External id": 499258,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141187668.015, "dur": 29649.564, + "args": { + "External id": 499259,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141187674.484, "dur": 5.482, + "args": { + "External id": 499260,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141187681.877, "dur": 29632.576, + "args": { + "External id": 499261,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141217543.724, "dur": 30.961, + "args": { + "External id": 499262,"Sequence number": 5516045, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13442 + } + }, + { + "ph": "s", "id": 189, "pid": 2338711, "tid": 2338711, "ts": 6274141217543.724, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274141217560.651, "dur": 8.958, + "args": { + "External id": 499263,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141217564.947, "dur": 4.425, + "args": { + "External id": 499264,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274141217659.079, "dur": 71.297, + "args": { + "External id": 499265,"Record function id": 0, "Ev Idx": 13445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274141217731.843, "dur": 1128.450, + "args": { + "External id": 499266,"Record function id": 0, "Ev Idx": 13446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141217773.330, "dur": 1072.877, + "args": { + "External id": 499267,"Sequence number": 5516046, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13447 + } + }, + { + "ph": "s", "id": 188, "pid": 2338711, "tid": 2338711, "ts": 6274141217773.330, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141217841.821, "dur": 48.707, + "args": { + "External id": 499268,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141217903.499, "dur": 106.311, + "args": { + "External id": 499269,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141218021.522, "dur": 72.228, + "args": { + "External id": 499270,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141218110.074, "dur": 34.814, + "args": { + "External id": 499271,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141218173.670, "dur": 27.843, + "args": { + "External id": 499272,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274141218220.536, "dur": 16.415, + "args": { + "External id": 499273,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274141218260.696, "dur": 136.575, + "args": { + "External id": 499274,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141218313.478, "dur": 13.207, + "args": { + "External id": 499275,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141218319.918, "dur": 5.687, + "args": { + "External id": 499276,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141218330.113, "dur": 4.514, + "args": { + "External id": 499277,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141218336.147, "dur": 1.295, + "args": { + "External id": 499278,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141218340.172, "dur": 4.884, + "args": { + "External id": 499279,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141218407.702, "dur": 48.377, + "args": { + "External id": 499280,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274141218487.086, "dur": 27.703, + "args": { + "External id": 499281,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141218525.726, "dur": 42.217, + "args": { + "External id": 499282,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141218577.246, "dur": 51.739, + "args": { + "External id": 499283,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141218656.615, "dur": 27.519, + "args": { + "External id": 499284,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141218692.759, "dur": 36.821, + "args": { + "External id": 499285,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141218747.441, "dur": 20.015, + "args": { + "External id": 499286,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13466 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.17)", "pid": 2338711, "tid": 2338711, + "ts": 6274141218927.300, "dur": 78.781, + "args": { + "External id": 499287,"Record function id": 0, "Ev Idx": 13467 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274141219121.256, "dur": 50.009, + "args": { + "External id": 499288,"Record function id": 0, "Ev Idx": 13468 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.18)", "pid": 2338711, "tid": 2338711, + "ts": 6274141219181.810, "dur": 32736.045, + "args": { + "External id": 499289,"Record function id": 0, "Ev Idx": 13469 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2338711, "tid": 2338711, + "ts": 6274141219191.345, "dur": 980.000, + "args": { + "External id": 499290,"Record function id": 0, "Ev Idx": 13470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141219275.325, "dur": 9.507, + "args": { + "External id": 499291,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141219298.439, "dur": 41.287, + "args": { + "External id": 499292,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141219304.478, "dur": 2.367, + "args": { + "External id": 499293,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141219311.178, "dur": 0.349, + "args": { + "External id": 499294,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141219313.152, "dur": 0.693, + "args": { + "External id": 499295,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141219315.653, "dur": 0.661, + "args": { + "External id": 499296,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141219319.788, "dur": 0.376, + "args": { + "External id": 499297,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141219321.417, "dur": 0.568, + "args": { + "External id": 499298,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141219323.781, "dur": 4.947, + "args": { + "External id": 499299,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141219330.181, "dur": 0.418, + "args": { + "External id": 499300,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141219332.527, "dur": 0.286, + "args": { + "External id": 499301,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141219352.719, "dur": 57.304, + "args": { + "External id": 499302,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274141219443.707, "dur": 129.894, + "args": { + "External id": 499303,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141219459.493, "dur": 6.051, + "args": { + "External id": 499304,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274141219470.909, "dur": 11.059, + "args": { + "External id": 499305,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141219475.471, "dur": 6.076, + "args": { + "External id": 499306,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141219479.402, "dur": 0.802, + "args": { + "External id": 499307,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141219490.013, "dur": 31.766, + "args": { + "External id": 499308,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141219492.498, "dur": 0.486, + "args": { + "External id": 499309,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141219494.667, "dur": 2.339, + "args": { + "External id": 499310,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141219498.461, "dur": 0.681, + "args": { + "External id": 499311,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141219500.993, "dur": 2.716, + "args": { + "External id": 499312,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141219507.696, "dur": 0.307, + "args": { + "External id": 499313,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141219509.176, "dur": 0.563, + "args": { + "External id": 499314,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141219511.543, "dur": 0.399, + "args": { + "External id": 499315,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141219514.808, "dur": 0.538, + "args": { + "External id": 499316,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141219517.238, "dur": 0.366, + "args": { + "External id": 499317,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141219532.453, "dur": 33.126, + "args": { + "External id": 499318,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274141219648.079, "dur": 383.025, + "args": { + "External id": 499319,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141219680.475, "dur": 345.344, + "args": { + "External id": 499320,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13500, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274141219691.024, "dur": 329.029, + "args": { + "External id": 499321,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141220089.653, "dur": 3.388, + "args": { + "External id": 499322,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13502, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2338711, "tid": 2338711, + "ts": 6274141220195.249, "dur": 31523.477, + "args": { + "External id": 499323,"Record function id": 0, "Ev Idx": 13503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141220294.797, "dur": 7.218, + "args": { + "External id": 499324,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141220305.344, "dur": 1.205, + "args": { + "External id": 499325,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141220308.229, "dur": 3.459, + "args": { + "External id": 499326,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141220313.442, "dur": 0.994, + "args": { + "External id": 499327,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141220315.747, "dur": 0.983, + "args": { + "External id": 499328,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141220318.164, "dur": 0.751, + "args": { + "External id": 499329,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141220320.554, "dur": 0.948, + "args": { + "External id": 499330,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141220323.073, "dur": 2.201, + "args": { + "External id": 499331,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141220327.144, "dur": 0.598, + "args": { + "External id": 499332,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141220331.382, "dur": 0.953, + "args": { + "External id": 499333,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141220351.726, "dur": 31324.210, + "args": { + "External id": 499334,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141220367.828, "dur": 31300.073, + "args": { + "External id": 499335,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141220386.592, "dur": 16.760, + "args": { + "External id": 499336,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141220407.141, "dur": 31225.525, + "args": { + "External id": 499337,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141220409.629, "dur": 31222.405, + "args": { + "External id": 499338,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141220416.162, "dur": 5.135, + "args": { + "External id": 499339,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141220423.372, "dur": 31205.237, + "args": { + "External id": 499340,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141251862.235, "dur": 31.039, + "args": { + "External id": 499341,"Sequence number": 5516047, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13521 + } + }, + { + "ph": "s", "id": 187, "pid": 2338711, "tid": 2338711, "ts": 6274141251862.235, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274141251879.425, "dur": 9.043, + "args": { + "External id": 499342,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141251884.124, "dur": 4.151, + "args": { + "External id": 499343,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274141251958.540, "dur": 69.161, + "args": { + "External id": 499344,"Record function id": 0, "Ev Idx": 13524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274141252029.138, "dur": 1168.200, + "args": { + "External id": 499345,"Record function id": 0, "Ev Idx": 13525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141252097.695, "dur": 1084.862, + "args": { + "External id": 499346,"Sequence number": 5516048, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13526 + } + }, + { + "ph": "s", "id": 186, "pid": 2338711, "tid": 2338711, "ts": 6274141252097.695, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141252172.826, "dur": 50.548, + "args": { + "External id": 499347,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141252238.887, "dur": 106.685, + "args": { + "External id": 499348,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141252357.818, "dur": 40.865, + "args": { + "External id": 499349,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141252405.236, "dur": 30.097, + "args": { + "External id": 499350,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141252465.630, "dur": 25.702, + "args": { + "External id": 499351,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274141252508.940, "dur": 19.560, + "args": { + "External id": 499352,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274141252550.290, "dur": 157.075, + "args": { + "External id": 499353,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141252620.433, "dur": 13.870, + "args": { + "External id": 499354,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141252626.874, "dur": 6.431, + "args": { + "External id": 499355,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141252637.357, "dur": 4.028, + "args": { + "External id": 499356,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141252642.925, "dur": 3.148, + "args": { + "External id": 499357,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141252648.679, "dur": 4.861, + "args": { + "External id": 499358,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141252719.286, "dur": 52.461, + "args": { + "External id": 499359,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274141252804.393, "dur": 27.961, + "args": { + "External id": 499360,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141252842.708, "dur": 42.395, + "args": { + "External id": 499361,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141252893.349, "dur": 38.537, + "args": { + "External id": 499362,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141252955.516, "dur": 32.996, + "args": { + "External id": 499363,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141252993.825, "dur": 35.779, + "args": { + "External id": 499364,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141253049.122, "dur": 57.224, + "args": { + "External id": 499365,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13545 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.18)", "pid": 2338711, "tid": 2338711, + "ts": 6274141253264.193, "dur": 83.839, + "args": { + "External id": 499366,"Record function id": 0, "Ev Idx": 13546 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274141253423.341, "dur": 47.431, + "args": { + "External id": 499367,"Record function id": 0, "Ev Idx": 13547 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.19)", "pid": 2338711, "tid": 2338711, + "ts": 6274141253480.353, "dur": 31448.382, + "args": { + "External id": 499368,"Record function id": 0, "Ev Idx": 13548 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2338711, "tid": 2338711, + "ts": 6274141253488.077, "dur": 963.549, + "args": { + "External id": 499369,"Record function id": 0, "Ev Idx": 13549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141253567.648, "dur": 9.325, + "args": { + "External id": 499370,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141253606.775, "dur": 40.649, + "args": { + "External id": 499371,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141253612.879, "dur": 2.433, + "args": { + "External id": 499372,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141253619.638, "dur": 0.600, + "args": { + "External id": 499373,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141253621.514, "dur": 0.640, + "args": { + "External id": 499374,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141253623.694, "dur": 0.583, + "args": { + "External id": 499375,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141253628.249, "dur": 0.610, + "args": { + "External id": 499376,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141253630.775, "dur": 0.594, + "args": { + "External id": 499377,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141253633.154, "dur": 3.979, + "args": { + "External id": 499378,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141253638.770, "dur": 0.367, + "args": { + "External id": 499379,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141253641.001, "dur": 0.340, + "args": { + "External id": 499380,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141253659.019, "dur": 53.909, + "args": { + "External id": 499381,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274141253748.932, "dur": 126.622, + "args": { + "External id": 499382,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141253760.593, "dur": 5.045, + "args": { + "External id": 499383,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274141253770.959, "dur": 10.596, + "args": { + "External id": 499384,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141253775.445, "dur": 5.716, + "args": { + "External id": 499385,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141253779.293, "dur": 0.729, + "args": { + "External id": 499386,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141253788.362, "dur": 35.236, + "args": { + "External id": 499387,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141253791.077, "dur": 2.301, + "args": { + "External id": 499388,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141253794.977, "dur": 0.579, + "args": { + "External id": 499389,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141253797.316, "dur": 0.673, + "args": { + "External id": 499390,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141253802.548, "dur": 2.476, + "args": { + "External id": 499391,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141253806.411, "dur": 0.621, + "args": { + "External id": 499392,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141253808.566, "dur": 0.287, + "args": { + "External id": 499393,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141253812.242, "dur": 0.644, + "args": { + "External id": 499394,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141253814.317, "dur": 0.558, + "args": { + "External id": 499395,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141253816.433, "dur": 2.743, + "args": { + "External id": 499396,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141253836.856, "dur": 31.073, + "args": { + "External id": 499397,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274141253932.773, "dur": 413.089, + "args": { + "External id": 499398,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141253965.782, "dur": 374.565, + "args": { + "External id": 499399,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13579, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274141253976.176, "dur": 357.386, + "args": { + "External id": 499400,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141254372.186, "dur": 2.663, + "args": { + "External id": 499401,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13581, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2338711, "tid": 2338711, + "ts": 6274141254474.191, "dur": 30260.588, + "args": { + "External id": 499402,"Record function id": 0, "Ev Idx": 13582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141254571.386, "dur": 6.654, + "args": { + "External id": 499403,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141254609.225, "dur": 1.916, + "args": { + "External id": 499404,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141254614.883, "dur": 3.282, + "args": { + "External id": 499405,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141254621.996, "dur": 0.980, + "args": { + "External id": 499406,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141254624.507, "dur": 0.737, + "args": { + "External id": 499407,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141254626.509, "dur": 1.363, + "args": { + "External id": 499408,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141254629.489, "dur": 0.878, + "args": { + "External id": 499409,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141254634.070, "dur": 2.234, + "args": { + "External id": 499410,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141254638.083, "dur": 0.581, + "args": { + "External id": 499411,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141254640.146, "dur": 0.617, + "args": { + "External id": 499412,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141254661.614, "dur": 30028.750, + "args": { + "External id": 499413,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141254678.471, "dur": 30003.436, + "args": { + "External id": 499414,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141254693.571, "dur": 16.486, + "args": { + "External id": 499415,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141254715.994, "dur": 29929.901, + "args": { + "External id": 499416,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141254718.833, "dur": 29926.462, + "args": { + "External id": 499417,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141254725.331, "dur": 5.687, + "args": { + "External id": 499418,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141254732.791, "dur": 29909.325, + "args": { + "External id": 499419,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141284872.048, "dur": 32.424, + "args": { + "External id": 499420,"Sequence number": 5516049, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13600 + } + }, + { + "ph": "s", "id": 185, "pid": 2338711, "tid": 2338711, "ts": 6274141284872.048, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274141284890.330, "dur": 9.462, + "args": { + "External id": 499421,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141284894.791, "dur": 4.787, + "args": { + "External id": 499422,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274141284971.792, "dur": 76.320, + "args": { + "External id": 499423,"Record function id": 0, "Ev Idx": 13603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274141285049.482, "dur": 1169.033, + "args": { + "External id": 499424,"Record function id": 0, "Ev Idx": 13604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141285120.387, "dur": 1083.782, + "args": { + "External id": 499425,"Sequence number": 5516050, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13605 + } + }, + { + "ph": "s", "id": 184, "pid": 2338711, "tid": 2338711, "ts": 6274141285120.387, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141285194.123, "dur": 53.874, + "args": { + "External id": 499426,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141285262.810, "dur": 107.489, + "args": { + "External id": 499427,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141285382.299, "dur": 37.713, + "args": { + "External id": 499428,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141285428.214, "dur": 29.854, + "args": { + "External id": 499429,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141285483.234, "dur": 27.727, + "args": { + "External id": 499430,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274141285533.177, "dur": 16.983, + "args": { + "External id": 499431,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274141285573.714, "dur": 153.477, + "args": { + "External id": 499432,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141285643.951, "dur": 13.052, + "args": { + "External id": 499433,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141285649.868, "dur": 6.150, + "args": { + "External id": 499434,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141285659.875, "dur": 4.146, + "args": { + "External id": 499435,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141285665.688, "dur": 1.210, + "args": { + "External id": 499436,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141285669.789, "dur": 5.280, + "args": { + "External id": 499437,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141285738.433, "dur": 53.402, + "args": { + "External id": 499438,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274141285823.656, "dur": 31.519, + "args": { + "External id": 499439,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141285864.359, "dur": 42.566, + "args": { + "External id": 499440,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141285916.749, "dur": 35.250, + "args": { + "External id": 499441,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141285973.093, "dur": 26.631, + "args": { + "External id": 499442,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141286007.166, "dur": 35.671, + "args": { + "External id": 499443,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141286097.453, "dur": 24.715, + "args": { + "External id": 499444,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13624 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.19)", "pid": 2338711, "tid": 2338711, + "ts": 6274141286287.418, "dur": 80.352, + "args": { + "External id": 499445,"Record function id": 0, "Ev Idx": 13625 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274141286442.666, "dur": 47.805, + "args": { + "External id": 499446,"Record function id": 0, "Ev Idx": 13626 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.20)", "pid": 2338711, "tid": 2338711, + "ts": 6274141286499.894, "dur": 31699.935, + "args": { + "External id": 499447,"Record function id": 0, "Ev Idx": 13627 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2338711, "tid": 2338711, + "ts": 6274141286509.969, "dur": 1035.693, + "args": { + "External id": 499448,"Record function id": 0, "Ev Idx": 13628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141286608.180, "dur": 10.219, + "args": { + "External id": 499449,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141286632.664, "dur": 41.202, + "args": { + "External id": 499450,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141286638.847, "dur": 2.767, + "args": { + "External id": 499451,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141286646.227, "dur": 0.386, + "args": { + "External id": 499452,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141286648.370, "dur": 0.613, + "args": { + "External id": 499453,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141286651.305, "dur": 0.536, + "args": { + "External id": 499454,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141286655.577, "dur": 0.515, + "args": { + "External id": 499455,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141286657.447, "dur": 0.429, + "args": { + "External id": 499456,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141286659.247, "dur": 4.458, + "args": { + "External id": 499457,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141286665.100, "dur": 0.562, + "args": { + "External id": 499458,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141286667.008, "dur": 0.431, + "args": { + "External id": 499459,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141286685.346, "dur": 53.383, + "args": { + "External id": 499460,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274141286774.187, "dur": 125.245, + "args": { + "External id": 499461,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141286785.079, "dur": 4.347, + "args": { + "External id": 499462,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274141286794.326, "dur": 10.659, + "args": { + "External id": 499463,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141286798.639, "dur": 5.966, + "args": { + "External id": 499464,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141286802.723, "dur": 0.631, + "args": { + "External id": 499465,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141286812.145, "dur": 38.024, + "args": { + "External id": 499466,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141286814.485, "dur": 2.794, + "args": { + "External id": 499467,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141286819.094, "dur": 0.608, + "args": { + "External id": 499468,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141286825.274, "dur": 0.626, + "args": { + "External id": 499469,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141286829.070, "dur": 3.096, + "args": { + "External id": 499470,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141286833.411, "dur": 0.483, + "args": { + "External id": 499471,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141286835.186, "dur": 2.574, + "args": { + "External id": 499472,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141286839.220, "dur": 0.370, + "args": { + "External id": 499473,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141286841.323, "dur": 0.307, + "args": { + "External id": 499474,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141286845.527, "dur": 0.426, + "args": { + "External id": 499475,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141286861.067, "dur": 30.695, + "args": { + "External id": 499476,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274141286955.687, "dur": 475.902, + "args": { + "External id": 499477,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141286989.094, "dur": 436.317, + "args": { + "External id": 499478,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13658, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274141287000.062, "dur": 418.862, + "args": { + "External id": 499479,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141287461.967, "dur": 2.707, + "args": { + "External id": 499480,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13660, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2338711, "tid": 2338711, + "ts": 6274141287568.126, "dur": 30404.997, + "args": { + "External id": 499481,"Record function id": 0, "Ev Idx": 13661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141287684.954, "dur": 7.280, + "args": { + "External id": 499482,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141287695.954, "dur": 1.285, + "args": { + "External id": 499483,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141287698.971, "dur": 3.597, + "args": { + "External id": 499484,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141287704.701, "dur": 0.830, + "args": { + "External id": 499485,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141287706.753, "dur": 0.893, + "args": { + "External id": 499486,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141287708.824, "dur": 1.053, + "args": { + "External id": 499487,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141287711.494, "dur": 1.230, + "args": { + "External id": 499488,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141287714.574, "dur": 2.078, + "args": { + "External id": 499489,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141287718.061, "dur": 0.821, + "args": { + "External id": 499490,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141287722.309, "dur": 0.828, + "args": { + "External id": 499491,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141287742.259, "dur": 30188.104, + "args": { + "External id": 499492,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141287758.868, "dur": 30164.210, + "args": { + "External id": 499493,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141287778.992, "dur": 16.128, + "args": { + "External id": 499494,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141287798.826, "dur": 30087.970, + "args": { + "External id": 499495,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141287801.613, "dur": 30084.573, + "args": { + "External id": 499496,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141287807.757, "dur": 5.185, + "args": { + "External id": 499497,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141287814.821, "dur": 30068.239, + "args": { + "External id": 499498,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141318138.989, "dur": 34.473, + "args": { + "External id": 499499,"Sequence number": 5516051, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13679 + } + }, + { + "ph": "s", "id": 183, "pid": 2338711, "tid": 2338711, "ts": 6274141318138.989, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274141318159.813, "dur": 8.820, + "args": { + "External id": 499500,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141318163.994, "dur": 4.358, + "args": { + "External id": 499501,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274141318241.140, "dur": 69.998, + "args": { + "External id": 499502,"Record function id": 0, "Ev Idx": 13682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274141318312.820, "dur": 1136.169, + "args": { + "External id": 499503,"Record function id": 0, "Ev Idx": 13683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141318352.802, "dur": 1082.290, + "args": { + "External id": 499504,"Sequence number": 5516052, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13684 + } + }, + { + "ph": "s", "id": 182, "pid": 2338711, "tid": 2338711, "ts": 6274141318352.802, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141318422.067, "dur": 53.494, + "args": { + "External id": 499505,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141318488.170, "dur": 116.419, + "args": { + "External id": 499506,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141318621.922, "dur": 41.460, + "args": { + "External id": 499507,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141318670.577, "dur": 30.492, + "args": { + "External id": 499508,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141318729.813, "dur": 28.646, + "args": { + "External id": 499509,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274141318778.241, "dur": 20.404, + "args": { + "External id": 499510,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274141318822.259, "dur": 135.064, + "args": { + "External id": 499511,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141318872.001, "dur": 11.756, + "args": { + "External id": 499512,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141318877.770, "dur": 5.247, + "args": { + "External id": 499513,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141318887.392, "dur": 4.160, + "args": { + "External id": 499514,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141318893.257, "dur": 1.346, + "args": { + "External id": 499515,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141318898.562, "dur": 5.190, + "args": { + "External id": 499516,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141318967.894, "dur": 47.398, + "args": { + "External id": 499517,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274141319045.716, "dur": 69.386, + "args": { + "External id": 499518,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141319128.466, "dur": 46.942, + "args": { + "External id": 499519,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141319183.486, "dur": 37.230, + "args": { + "External id": 499520,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141319243.223, "dur": 34.614, + "args": { + "External id": 499521,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141319284.024, "dur": 35.842, + "args": { + "External id": 499522,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141319340.448, "dur": 18.931, + "args": { + "External id": 499523,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13703 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.20)", "pid": 2338711, "tid": 2338711, + "ts": 6274141319517.741, "dur": 98.465, + "args": { + "External id": 499524,"Record function id": 0, "Ev Idx": 13704 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274141319697.315, "dur": 47.293, + "args": { + "External id": 499525,"Record function id": 0, "Ev Idx": 13705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.21)", "pid": 2338711, "tid": 2338711, + "ts": 6274141319754.796, "dur": 31707.911, + "args": { + "External id": 499526,"Record function id": 0, "Ev Idx": 13706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2338711, "tid": 2338711, + "ts": 6274141319763.880, "dur": 1006.332, + "args": { + "External id": 499527,"Record function id": 0, "Ev Idx": 13707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141319846.405, "dur": 10.048, + "args": { + "External id": 499528,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141319869.568, "dur": 39.213, + "args": { + "External id": 499529,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141319875.726, "dur": 2.347, + "args": { + "External id": 499530,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141319882.575, "dur": 0.492, + "args": { + "External id": 499531,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141319884.570, "dur": 0.476, + "args": { + "External id": 499532,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141319886.370, "dur": 0.619, + "args": { + "External id": 499533,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141319890.764, "dur": 0.740, + "args": { + "External id": 499534,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141319893.101, "dur": 0.415, + "args": { + "External id": 499535,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141319894.906, "dur": 4.319, + "args": { + "External id": 499536,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141319900.732, "dur": 0.396, + "args": { + "External id": 499537,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141319902.792, "dur": 0.292, + "args": { + "External id": 499538,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141319920.655, "dur": 55.053, + "args": { + "External id": 499539,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274141320009.146, "dur": 168.880, + "args": { + "External id": 499540,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141320020.204, "dur": 4.384, + "args": { + "External id": 499541,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274141320029.747, "dur": 10.900, + "args": { + "External id": 499542,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141320034.196, "dur": 6.030, + "args": { + "External id": 499543,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141320038.313, "dur": 0.694, + "args": { + "External id": 499544,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141320048.055, "dur": 67.322, + "args": { + "External id": 499545,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141320050.391, "dur": 37.563, + "args": { + "External id": 499546,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141320091.439, "dur": 0.612, + "args": { + "External id": 499547,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141320093.698, "dur": 0.458, + "args": { + "External id": 499548,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141320097.373, "dur": 2.621, + "args": { + "External id": 499549,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141320101.208, "dur": 0.764, + "args": { + "External id": 499550,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141320103.425, "dur": 0.431, + "args": { + "External id": 499551,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141320105.330, "dur": 0.348, + "args": { + "External id": 499552,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141320107.027, "dur": 0.257, + "args": { + "External id": 499553,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141320108.578, "dur": 2.366, + "args": { + "External id": 499554,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141320130.551, "dur": 39.069, + "args": { + "External id": 499555,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274141320246.988, "dur": 406.436, + "args": { + "External id": 499556,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141320282.598, "dur": 365.460, + "args": { + "External id": 499557,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13737, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274141320293.650, "dur": 348.141, + "args": { + "External id": 499558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141320685.647, "dur": 2.718, + "args": { + "External id": 499559,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13739, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2338711, "tid": 2338711, + "ts": 6274141320793.051, "dur": 30469.356, + "args": { + "External id": 499560,"Record function id": 0, "Ev Idx": 13740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141320895.049, "dur": 7.059, + "args": { + "External id": 499561,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141320905.776, "dur": 0.863, + "args": { + "External id": 499562,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141320908.247, "dur": 3.187, + "args": { + "External id": 499563,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141320912.925, "dur": 0.844, + "args": { + "External id": 499564,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141320914.988, "dur": 0.612, + "args": { + "External id": 499565,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141320916.694, "dur": 0.776, + "args": { + "External id": 499566,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141320919.092, "dur": 0.756, + "args": { + "External id": 499567,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141320921.450, "dur": 2.242, + "args": { + "External id": 499568,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141320925.134, "dur": 0.622, + "args": { + "External id": 499569,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141320929.172, "dur": 0.460, + "args": { + "External id": 499570,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141320949.257, "dur": 30269.113, + "args": { + "External id": 499571,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141320966.241, "dur": 30243.549, + "args": { + "External id": 499572,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141320983.878, "dur": 16.162, + "args": { + "External id": 499573,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141321003.641, "dur": 30168.522, + "args": { + "External id": 499574,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141321006.249, "dur": 30165.236, + "args": { + "External id": 499575,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141321013.074, "dur": 5.574, + "args": { + "External id": 499576,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141321020.555, "dur": 30147.448, + "args": { + "External id": 499577,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141351406.221, "dur": 31.650, + "args": { + "External id": 499578,"Sequence number": 5516053, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13758 + } + }, + { + "ph": "s", "id": 181, "pid": 2338711, "tid": 2338711, "ts": 6274141351406.221, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274141351424.255, "dur": 8.790, + "args": { + "External id": 499579,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141351428.378, "dur": 4.518, + "args": { + "External id": 499580,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274141351503.267, "dur": 71.307, + "args": { + "External id": 499581,"Record function id": 0, "Ev Idx": 13761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274141351575.850, "dur": 1137.812, + "args": { + "External id": 499582,"Record function id": 0, "Ev Idx": 13762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141351628.748, "dur": 1071.129, + "args": { + "External id": 499583,"Sequence number": 5516054, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13763 + } + }, + { + "ph": "s", "id": 180, "pid": 2338711, "tid": 2338711, "ts": 6274141351628.748, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141351700.140, "dur": 51.354, + "args": { + "External id": 499584,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141351765.165, "dur": 103.697, + "args": { + "External id": 499585,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141351880.032, "dur": 38.339, + "args": { + "External id": 499586,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141351926.860, "dur": 30.695, + "args": { + "External id": 499587,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141351984.029, "dur": 27.476, + "args": { + "External id": 499588,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274141352031.624, "dur": 18.066, + "args": { + "External id": 499589,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274141352109.384, "dur": 138.034, + "args": { + "External id": 499590,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141352162.511, "dur": 12.934, + "args": { + "External id": 499591,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141352168.399, "dur": 6.114, + "args": { + "External id": 499592,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141352178.337, "dur": 4.190, + "args": { + "External id": 499593,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141352184.036, "dur": 1.307, + "args": { + "External id": 499594,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141352187.966, "dur": 5.102, + "args": { + "External id": 499595,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141352258.061, "dur": 56.494, + "args": { + "External id": 499596,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274141352346.042, "dur": 31.837, + "args": { + "External id": 499597,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141352387.334, "dur": 42.206, + "args": { + "External id": 499598,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141352439.292, "dur": 35.421, + "args": { + "External id": 499599,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141352495.700, "dur": 24.086, + "args": { + "External id": 499600,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141352528.379, "dur": 36.608, + "args": { + "External id": 499601,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141352598.661, "dur": 21.423, + "args": { + "External id": 499602,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13782 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.21)", "pid": 2338711, "tid": 2338711, + "ts": 6274141352782.536, "dur": 77.916, + "args": { + "External id": 499603,"Record function id": 0, "Ev Idx": 13783 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274141352935.624, "dur": 47.766, + "args": { + "External id": 499604,"Record function id": 0, "Ev Idx": 13784 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.22)", "pid": 2338711, "tid": 2338711, + "ts": 6274141352993.159, "dur": 31271.322, + "args": { + "External id": 499605,"Record function id": 0, "Ev Idx": 13785 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2338711, "tid": 2338711, + "ts": 6274141353002.841, "dur": 985.654, + "args": { + "External id": 499606,"Record function id": 0, "Ev Idx": 13786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141353123.603, "dur": 10.535, + "args": { + "External id": 499607,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141353147.800, "dur": 40.633, + "args": { + "External id": 499608,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141353154.025, "dur": 2.207, + "args": { + "External id": 499609,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141353161.041, "dur": 0.336, + "args": { + "External id": 499610,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141353163.012, "dur": 0.527, + "args": { + "External id": 499611,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141353165.185, "dur": 0.485, + "args": { + "External id": 499612,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141353168.757, "dur": 0.775, + "args": { + "External id": 499613,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141353171.585, "dur": 0.634, + "args": { + "External id": 499614,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141353173.763, "dur": 4.206, + "args": { + "External id": 499615,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141353179.588, "dur": 0.622, + "args": { + "External id": 499616,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141353181.965, "dur": 0.267, + "args": { + "External id": 499617,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141353200.209, "dur": 56.788, + "args": { + "External id": 499618,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274141353292.448, "dur": 128.822, + "args": { + "External id": 499619,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141353303.648, "dur": 4.697, + "args": { + "External id": 499620,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274141353313.334, "dur": 11.352, + "args": { + "External id": 499621,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141353318.217, "dur": 6.059, + "args": { + "External id": 499622,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141353322.473, "dur": 0.589, + "args": { + "External id": 499623,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141353331.866, "dur": 36.943, + "args": { + "External id": 499624,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141353334.704, "dur": 2.247, + "args": { + "External id": 499625,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141353338.817, "dur": 0.483, + "args": { + "External id": 499626,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141353340.822, "dur": 0.428, + "args": { + "External id": 499627,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141353344.968, "dur": 2.206, + "args": { + "External id": 499628,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141353348.659, "dur": 0.566, + "args": { + "External id": 499629,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141353350.563, "dur": 0.612, + "args": { + "External id": 499630,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141353353.674, "dur": 0.493, + "args": { + "External id": 499631,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141353355.601, "dur": 0.328, + "args": { + "External id": 499632,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141353360.753, "dur": 1.314, + "args": { + "External id": 499633,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141353379.719, "dur": 33.432, + "args": { + "External id": 499634,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274141353480.607, "dur": 402.797, + "args": { + "External id": 499635,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141353516.008, "dur": 361.614, + "args": { + "External id": 499636,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13816, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274141353526.342, "dur": 344.347, + "args": { + "External id": 499637,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141353911.270, "dur": 2.395, + "args": { + "External id": 499638,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13818, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2338711, "tid": 2338711, + "ts": 6274141354010.179, "dur": 30029.047, + "args": { + "External id": 499639,"Record function id": 0, "Ev Idx": 13819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141354142.500, "dur": 6.895, + "args": { + "External id": 499640,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141354153.332, "dur": 0.959, + "args": { + "External id": 499641,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141354156.097, "dur": 2.764, + "args": { + "External id": 499642,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141354160.667, "dur": 0.824, + "args": { + "External id": 499643,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141354162.976, "dur": 0.851, + "args": { + "External id": 499644,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141354164.990, "dur": 0.930, + "args": { + "External id": 499645,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141354167.475, "dur": 0.899, + "args": { + "External id": 499646,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141354170.204, "dur": 2.036, + "args": { + "External id": 499647,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141354173.667, "dur": 0.990, + "args": { + "External id": 499648,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141354178.084, "dur": 0.591, + "args": { + "External id": 499649,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141354198.407, "dur": 29796.276, + "args": { + "External id": 499650,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141354222.480, "dur": 29764.104, + "args": { + "External id": 499651,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141354236.840, "dur": 17.105, + "args": { + "External id": 499652,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141354257.581, "dur": 29695.617, + "args": { + "External id": 499653,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141354260.527, "dur": 29691.951, + "args": { + "External id": 499654,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141354267.165, "dur": 5.265, + "args": { + "External id": 499655,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141354274.007, "dur": 29675.158, + "args": { + "External id": 499656,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141384200.721, "dur": 34.961, + "args": { + "External id": 499657,"Sequence number": 5516055, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13837 + } + }, + { + "ph": "s", "id": 179, "pid": 2338711, "tid": 2338711, "ts": 6274141384200.721, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274141384218.626, "dur": 11.579, + "args": { + "External id": 499658,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141384225.079, "dur": 4.813, + "args": { + "External id": 499659,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274141384304.315, "dur": 75.627, + "args": { + "External id": 499660,"Record function id": 0, "Ev Idx": 13840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274141384381.419, "dur": 1140.195, + "args": { + "External id": 499661,"Record function id": 0, "Ev Idx": 13841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141384420.109, "dur": 1087.469, + "args": { + "External id": 499662,"Sequence number": 5516056, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13842 + } + }, + { + "ph": "s", "id": 178, "pid": 2338711, "tid": 2338711, "ts": 6274141384420.109, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141384485.814, "dur": 48.526, + "args": { + "External id": 499663,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141384547.158, "dur": 120.810, + "args": { + "External id": 499664,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141384683.167, "dur": 39.768, + "args": { + "External id": 499665,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141384731.925, "dur": 29.891, + "args": { + "External id": 499666,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141384792.027, "dur": 27.990, + "args": { + "External id": 499667,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274141384842.734, "dur": 17.258, + "args": { + "External id": 499668,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274141384884.006, "dur": 133.102, + "args": { + "External id": 499669,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141384935.227, "dur": 12.610, + "args": { + "External id": 499670,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141384941.506, "dur": 5.499, + "args": { + "External id": 499671,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141384950.820, "dur": 4.271, + "args": { + "External id": 499672,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141384956.514, "dur": 1.284, + "args": { + "External id": 499673,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141384960.396, "dur": 4.585, + "args": { + "External id": 499674,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141385027.714, "dur": 89.669, + "args": { + "External id": 499675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274141385158.232, "dur": 30.727, + "args": { + "External id": 499676,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141385198.337, "dur": 49.687, + "args": { + "External id": 499677,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141385258.860, "dur": 39.849, + "args": { + "External id": 499678,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141385322.586, "dur": 26.673, + "args": { + "External id": 499679,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141385358.283, "dur": 36.096, + "args": { + "External id": 499680,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141385412.328, "dur": 17.505, + "args": { + "External id": 499681,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13861 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.22)", "pid": 2338711, "tid": 2338711, + "ts": 6274141385605.912, "dur": 81.512, + "args": { + "External id": 499682,"Record function id": 0, "Ev Idx": 13862 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274141385766.833, "dur": 49.605, + "args": { + "External id": 499683,"Record function id": 0, "Ev Idx": 13863 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.23)", "pid": 2338711, "tid": 2338711, + "ts": 6274141385825.807, "dur": 31481.550, + "args": { + "External id": 499684,"Record function id": 0, "Ev Idx": 13864 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2338711, "tid": 2338711, + "ts": 6274141385833.493, "dur": 1010.566, + "args": { + "External id": 499685,"Record function id": 0, "Ev Idx": 13865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141385916.767, "dur": 9.721, + "args": { + "External id": 499686,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141385939.931, "dur": 38.511, + "args": { + "External id": 499687,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141385945.847, "dur": 2.426, + "args": { + "External id": 499688,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141385952.573, "dur": 0.462, + "args": { + "External id": 499689,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141385954.653, "dur": 0.338, + "args": { + "External id": 499690,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141385956.446, "dur": 0.582, + "args": { + "External id": 499691,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141385959.824, "dur": 0.517, + "args": { + "External id": 499692,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141385962.033, "dur": 0.549, + "args": { + "External id": 499693,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141385964.118, "dur": 4.524, + "args": { + "External id": 499694,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141385970.119, "dur": 0.290, + "args": { + "External id": 499695,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141385971.688, "dur": 0.299, + "args": { + "External id": 499696,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141385990.343, "dur": 55.458, + "args": { + "External id": 499697,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274141386118.724, "dur": 161.096, + "args": { + "External id": 499698,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141386131.194, "dur": 6.002, + "args": { + "External id": 499699,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274141386142.811, "dur": 11.979, + "args": { + "External id": 499700,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141386147.583, "dur": 6.800, + "args": { + "External id": 499701,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141386151.878, "dur": 0.906, + "args": { + "External id": 499702,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141386162.755, "dur": 32.252, + "args": { + "External id": 499703,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141386165.773, "dur": 2.670, + "args": { + "External id": 499704,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141386170.050, "dur": 0.253, + "args": { + "External id": 499705,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141386172.070, "dur": 0.515, + "args": { + "External id": 499706,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141386175.873, "dur": 2.482, + "args": { + "External id": 499707,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141386180.283, "dur": 0.266, + "args": { + "External id": 499708,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141386182.026, "dur": 0.385, + "args": { + "External id": 499709,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141386185.731, "dur": 0.251, + "args": { + "External id": 499710,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141386187.245, "dur": 0.246, + "args": { + "External id": 499711,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141386189.047, "dur": 2.302, + "args": { + "External id": 499712,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141386232.023, "dur": 39.381, + "args": { + "External id": 499713,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274141386338.477, "dur": 402.739, + "args": { + "External id": 499714,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141386370.927, "dur": 364.767, + "args": { + "External id": 499715,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13895, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274141386383.411, "dur": 345.905, + "args": { + "External id": 499716,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141386768.810, "dur": 2.385, + "args": { + "External id": 499717,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13897, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2338711, "tid": 2338711, + "ts": 6274141386866.988, "dur": 30241.626, + "args": { + "External id": 499718,"Record function id": 0, "Ev Idx": 13898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141386969.005, "dur": 6.703, + "args": { + "External id": 499719,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141386979.329, "dur": 1.250, + "args": { + "External id": 499720,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141386982.468, "dur": 3.293, + "args": { + "External id": 499721,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141386987.471, "dur": 0.985, + "args": { + "External id": 499722,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141386989.752, "dur": 0.760, + "args": { + "External id": 499723,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141386991.865, "dur": 0.860, + "args": { + "External id": 499724,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141386994.284, "dur": 0.892, + "args": { + "External id": 499725,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141386996.838, "dur": 2.148, + "args": { + "External id": 499726,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141387000.570, "dur": 0.831, + "args": { + "External id": 499727,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141387004.560, "dur": 0.714, + "args": { + "External id": 499728,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141387024.568, "dur": 30018.116, + "args": { + "External id": 499729,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141387041.174, "dur": 29993.652, + "args": { + "External id": 499730,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141387093.491, "dur": 18.353, + "args": { + "External id": 499731,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141387115.701, "dur": 29882.991, + "args": { + "External id": 499732,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141387118.456, "dur": 29879.688, + "args": { + "External id": 499733,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141387125.030, "dur": 5.994, + "args": { + "External id": 499734,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141387132.751, "dur": 29862.125, + "args": { + "External id": 499735,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141417249.375, "dur": 32.662, + "args": { + "External id": 499736,"Sequence number": 5516057, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13916 + } + }, + { + "ph": "s", "id": 177, "pid": 2338711, "tid": 2338711, "ts": 6274141417249.375, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274141417268.660, "dur": 8.734, + "args": { + "External id": 499737,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141417272.472, "dur": 4.701, + "args": { + "External id": 499738,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274141417346.520, "dur": 71.540, + "args": { + "External id": 499739,"Record function id": 0, "Ev Idx": 13919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274141417419.411, "dur": 1140.025, + "args": { + "External id": 499740,"Record function id": 0, "Ev Idx": 13920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141417463.954, "dur": 1080.988, + "args": { + "External id": 499741,"Sequence number": 5516058, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13921 + } + }, + { + "ph": "s", "id": 176, "pid": 2338711, "tid": 2338711, "ts": 6274141417463.954, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141417535.966, "dur": 67.972, + "args": { + "External id": 499742,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141417620.190, "dur": 107.931, + "args": { + "External id": 499743,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141417740.010, "dur": 39.747, + "args": { + "External id": 499744,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141417788.378, "dur": 29.398, + "args": { + "External id": 499745,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141417843.580, "dur": 30.224, + "args": { + "External id": 499746,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274141417895.044, "dur": 17.002, + "args": { + "External id": 499747,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274141417935.471, "dur": 169.579, + "args": { + "External id": 499748,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141417985.778, "dur": 12.753, + "args": { + "External id": 499749,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141417991.773, "dur": 5.713, + "args": { + "External id": 499750,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141418001.442, "dur": 3.707, + "args": { + "External id": 499751,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141418006.565, "dur": 0.932, + "args": { + "External id": 499752,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141418010.096, "dur": 4.350, + "args": { + "External id": 499753,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141418118.862, "dur": 58.192, + "args": { + "External id": 499754,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274141418208.853, "dur": 31.204, + "args": { + "External id": 499755,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141418249.760, "dur": 41.379, + "args": { + "External id": 499756,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141418300.807, "dur": 35.134, + "args": { + "External id": 499757,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141418356.653, "dur": 26.913, + "args": { + "External id": 499758,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141418391.785, "dur": 34.883, + "args": { + "External id": 499759,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141418447.427, "dur": 17.757, + "args": { + "External id": 499760,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13940 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.23)", "pid": 2338711, "tid": 2338711, + "ts": 6274141418644.785, "dur": 80.316, + "args": { + "External id": 499761,"Record function id": 0, "Ev Idx": 13941 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274141418802.918, "dur": 48.395, + "args": { + "External id": 499762,"Record function id": 0, "Ev Idx": 13942 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.24)", "pid": 2338711, "tid": 2338711, + "ts": 6274141418860.198, "dur": 32642.756, + "args": { + "External id": 499763,"Record function id": 0, "Ev Idx": 13943 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2338711, "tid": 2338711, + "ts": 6274141418868.962, "dur": 995.920, + "args": { + "External id": 499764,"Record function id": 0, "Ev Idx": 13944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141418954.687, "dur": 9.292, + "args": { + "External id": 499765,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141418979.912, "dur": 40.672, + "args": { + "External id": 499766,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141418985.897, "dur": 2.556, + "args": { + "External id": 499767,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141418993.378, "dur": 0.583, + "args": { + "External id": 499768,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141418995.447, "dur": 0.514, + "args": { + "External id": 499769,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141418997.623, "dur": 0.607, + "args": { + "External id": 499770,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141419001.633, "dur": 0.249, + "args": { + "External id": 499771,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141419003.772, "dur": 0.388, + "args": { + "External id": 499772,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141419005.712, "dur": 4.964, + "args": { + "External id": 499773,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141419012.085, "dur": 0.336, + "args": { + "External id": 499774,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141419014.149, "dur": 0.343, + "args": { + "External id": 499775,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141419032.059, "dur": 90.664, + "args": { + "External id": 499776,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274141419162.454, "dur": 130.544, + "args": { + "External id": 499777,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141419174.814, "dur": 6.060, + "args": { + "External id": 499778,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274141419186.643, "dur": 11.481, + "args": { + "External id": 499779,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141419191.351, "dur": 6.380, + "args": { + "External id": 499780,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141419195.508, "dur": 0.664, + "args": { + "External id": 499781,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141419206.622, "dur": 33.215, + "args": { + "External id": 499782,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141419209.475, "dur": 2.485, + "args": { + "External id": 499783,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141419213.688, "dur": 0.460, + "args": { + "External id": 499784,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141419215.858, "dur": 0.596, + "args": { + "External id": 499785,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141419219.863, "dur": 2.636, + "args": { + "External id": 499786,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141419223.928, "dur": 0.354, + "args": { + "External id": 499787,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141419225.854, "dur": 0.421, + "args": { + "External id": 499788,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141419229.639, "dur": 0.240, + "args": { + "External id": 499789,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141419231.265, "dur": 0.330, + "args": { + "External id": 499790,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141419233.018, "dur": 2.302, + "args": { + "External id": 499791,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141419252.122, "dur": 32.976, + "args": { + "External id": 499792,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274141419349.557, "dur": 409.908, + "args": { + "External id": 499793,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141419388.600, "dur": 365.238, + "args": { + "External id": 499794,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13974, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274141419399.172, "dur": 348.220, + "args": { + "External id": 499795,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141419786.860, "dur": 2.234, + "args": { + "External id": 499796,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13976, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2338711, "tid": 2338711, + "ts": 6274141419887.808, "dur": 31415.950, + "args": { + "External id": 499797,"Record function id": 0, "Ev Idx": 13977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141419992.800, "dur": 6.446, + "args": { + "External id": 499798,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141420008.822, "dur": 1.210, + "args": { + "External id": 499799,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141420011.877, "dur": 3.170, + "args": { + "External id": 499800,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141420018.713, "dur": 1.011, + "args": { + "External id": 499801,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141420021.122, "dur": 1.079, + "args": { + "External id": 499802,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141420023.643, "dur": 0.739, + "args": { + "External id": 499803,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141420025.981, "dur": 1.020, + "args": { + "External id": 499804,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141420030.669, "dur": 2.146, + "args": { + "External id": 499805,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141420034.322, "dur": 0.760, + "args": { + "External id": 499806,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141420036.455, "dur": 0.525, + "args": { + "External id": 499807,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141420091.927, "dur": 31165.088, + "args": { + "External id": 499808,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141420110.277, "dur": 31138.735, + "args": { + "External id": 499809,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141420125.778, "dur": 17.632, + "args": { + "External id": 499810,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141420149.439, "dur": 31064.566, + "args": { + "External id": 499811,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141420152.016, "dur": 31061.497, + "args": { + "External id": 499812,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141420158.067, "dur": 5.946, + "args": { + "External id": 499813,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141420165.664, "dur": 31044.459, + "args": { + "External id": 499814,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141451446.133, "dur": 32.775, + "args": { + "External id": 499815,"Sequence number": 5516059, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13995 + } + }, + { + "ph": "s", "id": 175, "pid": 2338711, "tid": 2338711, "ts": 6274141451446.133, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274141451465.359, "dur": 8.372, + "args": { + "External id": 499816,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141451469.064, "dur": 4.382, + "args": { + "External id": 499817,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274141451543.471, "dur": 83.171, + "args": { + "External id": 499818,"Record function id": 0, "Ev Idx": 13998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274141451629.300, "dur": 1130.762, + "args": { + "External id": 499819,"Record function id": 0, "Ev Idx": 13999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141451670.378, "dur": 1075.349, + "args": { + "External id": 499820,"Sequence number": 5516060, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 14000 + } + }, + { + "ph": "s", "id": 174, "pid": 2338711, "tid": 2338711, "ts": 6274141451670.378, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141451741.264, "dur": 49.086, + "args": { + "External id": 499821,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 14001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141451803.427, "dur": 103.670, + "args": { + "External id": 499822,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 14002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141451918.363, "dur": 38.448, + "args": { + "External id": 499823,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 14003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141451965.796, "dur": 29.547, + "args": { + "External id": 499824,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 14004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141452019.674, "dur": 28.002, + "args": { + "External id": 499825,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 14005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274141452104.151, "dur": 21.181, + "args": { + "External id": 499826,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 14006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274141452151.432, "dur": 134.874, + "args": { + "External id": 499827,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141452204.571, "dur": 12.445, + "args": { + "External id": 499828,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141452210.225, "dur": 5.991, + "args": { + "External id": 499829,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141452220.273, "dur": 3.705, + "args": { + "External id": 499830,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141452225.502, "dur": 1.360, + "args": { + "External id": 499831,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141452229.633, "dur": 5.035, + "args": { + "External id": 499832,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141452297.117, "dur": 53.171, + "args": { + "External id": 499833,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 14013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274141452381.908, "dur": 28.329, + "args": { + "External id": 499834,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 14014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141452419.305, "dur": 41.770, + "args": { + "External id": 499835,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 14015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141452471.010, "dur": 34.752, + "args": { + "External id": 499836,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 14016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141452537.155, "dur": 29.070, + "args": { + "External id": 499837,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 14017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141452574.352, "dur": 52.817, + "args": { + "External id": 499838,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 14018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141452652.336, "dur": 21.449, + "args": { + "External id": 499839,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 14019 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.24)", "pid": 2338711, "tid": 2338711, + "ts": 6274141452828.750, "dur": 80.823, + "args": { + "External id": 499840,"Record function id": 0, "Ev Idx": 14020 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274141452984.974, "dur": 46.273, + "args": { + "External id": 499841,"Record function id": 0, "Ev Idx": 14021 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.25)", "pid": 2338711, "tid": 2338711, + "ts": 6274141453041.371, "dur": 31447.163, + "args": { + "External id": 499842,"Record function id": 0, "Ev Idx": 14022 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 2338711, "tid": 2338711, + "ts": 6274141453050.586, "dur": 1058.785, + "args": { + "External id": 499843,"Record function id": 0, "Ev Idx": 14023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141453167.823, "dur": 12.715, + "args": { + "External id": 499844,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141453194.939, "dur": 43.778, + "args": { + "External id": 499845,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 14025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141453201.353, "dur": 2.364, + "args": { + "External id": 499846,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141453208.579, "dur": 0.553, + "args": { + "External id": 499847,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141453210.787, "dur": 0.548, + "args": { + "External id": 499848,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141453212.922, "dur": 0.721, + "args": { + "External id": 499849,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141453217.650, "dur": 0.405, + "args": { + "External id": 499850,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141453219.561, "dur": 0.397, + "args": { + "External id": 499851,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141453221.574, "dur": 3.425, + "args": { + "External id": 499852,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141453226.512, "dur": 0.551, + "args": { + "External id": 499853,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141453228.733, "dur": 0.488, + "args": { + "External id": 499854,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141453251.390, "dur": 60.839, + "args": { + "External id": 499855,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 14035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274141453346.924, "dur": 126.358, + "args": { + "External id": 499856,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 14036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141453361.168, "dur": 4.437, + "args": { + "External id": 499857,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274141453370.598, "dur": 11.155, + "args": { + "External id": 499858,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 14038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141453375.273, "dur": 6.057, + "args": { + "External id": 499859,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 14039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141453379.443, "dur": 0.725, + "args": { + "External id": 499860,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 14040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274141453388.789, "dur": 32.210, + "args": { + "External id": 499861,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 14041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141453391.593, "dur": 2.600, + "args": { + "External id": 499862,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141453395.762, "dur": 0.660, + "args": { + "External id": 499863,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141453398.195, "dur": 0.430, + "args": { + "External id": 499864,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141453401.870, "dur": 2.762, + "args": { + "External id": 499865,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141453405.733, "dur": 0.529, + "args": { + "External id": 499866,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141453407.734, "dur": 0.328, + "args": { + "External id": 499867,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141453410.943, "dur": 0.419, + "args": { + "External id": 499868,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141453412.668, "dur": 0.418, + "args": { + "External id": 499869,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141453414.566, "dur": 2.170, + "args": { + "External id": 499870,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141453434.566, "dur": 31.198, + "args": { + "External id": 499871,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 14051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274141453527.085, "dur": 437.521, + "args": { + "External id": 499872,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 14052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141453559.108, "dur": 399.535, + "args": { + "External id": 499873,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 14053, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274141453569.476, "dur": 381.037, + "args": { + "External id": 499874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 14054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274141453992.907, "dur": 2.660, + "args": { + "External id": 499875,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 14055, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 2338711, "tid": 2338711, + "ts": 6274141454134.991, "dur": 30159.374, + "args": { + "External id": 499876,"Record function id": 0, "Ev Idx": 14056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141454237.569, "dur": 7.029, + "args": { + "External id": 499877,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 14057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141454248.410, "dur": 1.095, + "args": { + "External id": 499878,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141454254.117, "dur": 3.112, + "args": { + "External id": 499879,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 14059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141454258.794, "dur": 0.855, + "args": { + "External id": 499880,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 14060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141454261.124, "dur": 0.851, + "args": { + "External id": 499881,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 14061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141454263.389, "dur": 0.678, + "args": { + "External id": 499882,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 14062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141454267.639, "dur": 0.795, + "args": { + "External id": 499883,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141454269.751, "dur": 1.934, + "args": { + "External id": 499884,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 14064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141454273.071, "dur": 1.051, + "args": { + "External id": 499885,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 14065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141454275.932, "dur": 1.137, + "args": { + "External id": 499886,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 14066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141454297.738, "dur": 29950.243, + "args": { + "External id": 499887,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 14067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141454317.683, "dur": 29922.183, + "args": { + "External id": 499888,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 14068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141454340.094, "dur": 16.797, + "args": { + "External id": 499889,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141454360.376, "dur": 29843.197, + "args": { + "External id": 499890,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 14070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141454363.500, "dur": 29839.461, + "args": { + "External id": 499891,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 14071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141454370.514, "dur": 5.413, + "args": { + "External id": 499892,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141454377.854, "dur": 29821.862, + "args": { + "External id": 499893,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 14073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141484433.620, "dur": 30.211, + "args": { + "External id": 499894,"Sequence number": 5516061, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 14074 + } + }, + { + "ph": "s", "id": 173, "pid": 2338711, "tid": 2338711, "ts": 6274141484433.620, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274141484450.483, "dur": 8.337, + "args": { + "External id": 499895,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 14075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141484454.298, "dur": 4.267, + "args": { + "External id": 499896,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274141484529.763, "dur": 85.016, + "args": { + "External id": 499897,"Record function id": 0, "Ev Idx": 14077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274141484617.430, "dur": 1149.195, + "args": { + "External id": 499898,"Record function id": 0, "Ev Idx": 14078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141484660.732, "dur": 1091.872, + "args": { + "External id": 499899,"Sequence number": 5516062, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 14079 + } + }, + { + "ph": "s", "id": 172, "pid": 2338711, "tid": 2338711, "ts": 6274141484660.732, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141484729.645, "dur": 53.116, + "args": { + "External id": 499900,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 14080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141484795.726, "dur": 94.076, + "args": { + "External id": 499901,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 14081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141484901.981, "dur": 36.391, + "args": { + "External id": 499902,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 14082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141484947.664, "dur": 31.206, + "args": { + "External id": 499903,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 14083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141485004.873, "dur": 29.598, + "args": { + "External id": 499904,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 14084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274141485102.155, "dur": 24.551, + "args": { + "External id": 499905,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 14085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274141485153.821, "dur": 136.185, + "args": { + "External id": 499906,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141485207.636, "dur": 12.461, + "args": { + "External id": 499907,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141485213.149, "dur": 6.214, + "args": { + "External id": 499908,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141485222.868, "dur": 4.647, + "args": { + "External id": 499909,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141485228.746, "dur": 1.126, + "args": { + "External id": 499910,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141485232.308, "dur": 4.477, + "args": { + "External id": 499911,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141485301.391, "dur": 55.760, + "args": { + "External id": 499912,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 14092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274141485388.320, "dur": 30.692, + "args": { + "External id": 499913,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 14093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141485427.939, "dur": 42.735, + "args": { + "External id": 499914,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 14094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141485481.619, "dur": 35.770, + "args": { + "External id": 499915,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 14095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141485541.055, "dur": 27.145, + "args": { + "External id": 499916,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 14096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141485576.217, "dur": 54.059, + "args": { + "External id": 499917,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 14097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274141485652.190, "dur": 22.465, + "args": { + "External id": 499918,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 14098 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.25)", "pid": 2338711, "tid": 2338711, + "ts": 6274141485834.740, "dur": 37.657, + "args": { + "External id": 499919,"Record function id": 0, "Ev Idx": 14099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141486006.603, "dur": 334.118, + "args": { + "External id": 499920,"Sequence number": 5516063, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 14100 + } + }, + { + "ph": "s", "id": 171, "pid": 2338711, "tid": 2338711, "ts": 6274141486006.603, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141486036.398, "dur": 7.749, + "args": { + "External id": 499921,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141486038.703, "dur": 5.254, + "args": { + "External id": 499922,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141486090.485, "dur": 15.915, + "args": { + "External id": 499923,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141486094.484, "dur": 11.125, + "args": { + "External id": 499924,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141486118.167, "dur": 4.973, + "args": { + "External id": 499925,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141486317.688, "dur": 7.984, + "args": { + "External id": 499926,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141486321.583, "dur": 3.783, + "args": { + "External id": 499927,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141486369.825, "dur": 146.309, + "args": { + "External id": 499928,"Sequence number": 5516064, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141486372.272, "dur": 14.895, + "args": { + "External id": 499929,"Sequence number": 5516064, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14109 + } + }, + { + "ph": "s", "id": 170, "pid": 2338711, "tid": 2338711, "ts": 6274141486372.272, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141486377.530, "dur": 7.787, + "args": { + "External id": 499930,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141486382.678, "dur": 2.295, + "args": { + "External id": 499931,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141486390.322, "dur": 125.448, + "args": { + "External id": 499932,"Sequence number": 5516065, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141486393.166, "dur": 4.811, + "args": { + "External id": 499933,"Sequence number": 5516065, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141486394.191, "dur": 3.610, + "args": { + "External id": 499934,"Sequence number": 5516065, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14114 + } + }, + { + "ph": "s", "id": 169, "pid": 2338711, "tid": 2338711, "ts": 6274141486394.191, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141486399.538, "dur": 100.858, + "args": { + "External id": 499935,"Sequence number": 5516066, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14115 + } + }, + { + "ph": "s", "id": 168, "pid": 2338711, "tid": 2338711, "ts": 6274141486399.538, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141486506.736, "dur": 7.889, + "args": { + "External id": 499936,"Sequence number": 5516067, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14116 + } + }, + { + "ph": "s", "id": 167, "pid": 2338711, "tid": 2338711, "ts": 6274141486506.736, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141486527.378, "dur": 88.562, + "args": { + "External id": 499937,"Sequence number": 5516068, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141486528.336, "dur": 9.631, + "args": { + "External id": 499938,"Sequence number": 5516068, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14118 + } + }, + { + "ph": "s", "id": 166, "pid": 2338711, "tid": 2338711, "ts": 6274141486528.336, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141486530.954, "dur": 5.833, + "args": { + "External id": 499939,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141486535.614, "dur": 0.944, + "args": { + "External id": 499940,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141486538.529, "dur": 77.139, + "args": { + "External id": 499941,"Sequence number": 5516069, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141486539.613, "dur": 5.396, + "args": { + "External id": 499942,"Sequence number": 5516069, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141486540.514, "dur": 4.365, + "args": { + "External id": 499943,"Sequence number": 5516069, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14123 + } + }, + { + "ph": "s", "id": 165, "pid": 2338711, "tid": 2338711, "ts": 6274141486540.514, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141486545.535, "dur": 61.326, + "args": { + "External id": 499944,"Sequence number": 5516070, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14124 + } + }, + { + "ph": "s", "id": 164, "pid": 2338711, "tid": 2338711, "ts": 6274141486545.535, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141486609.888, "dur": 5.091, + "args": { + "External id": 499945,"Sequence number": 5516071, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14125 + } + }, + { + "ph": "s", "id": 163, "pid": 2338711, "tid": 2338711, "ts": 6274141486609.888, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141486625.235, "dur": 70.123, + "args": { + "External id": 499946,"Sequence number": 5516072, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141486626.076, "dur": 6.300, + "args": { + "External id": 499947,"Sequence number": 5516072, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14127 + } + }, + { + "ph": "s", "id": 162, "pid": 2338711, "tid": 2338711, "ts": 6274141486626.076, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141486628.145, "dur": 3.166, + "args": { + "External id": 499948,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141486630.076, "dur": 1.072, + "args": { + "External id": 499949,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141486635.598, "dur": 59.575, + "args": { + "External id": 499950,"Sequence number": 5516073, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141486636.792, "dur": 5.288, + "args": { + "External id": 499951,"Sequence number": 5516073, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141486637.945, "dur": 3.981, + "args": { + "External id": 499952,"Sequence number": 5516073, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14132 + } + }, + { + "ph": "s", "id": 161, "pid": 2338711, "tid": 2338711, "ts": 6274141486637.945, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141486642.862, "dur": 43.928, + "args": { + "External id": 499953,"Sequence number": 5516074, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14133 + } + }, + { + "ph": "s", "id": 160, "pid": 2338711, "tid": 2338711, "ts": 6274141486642.862, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141486688.839, "dur": 5.936, + "args": { + "External id": 499954,"Sequence number": 5516075, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14134 + } + }, + { + "ph": "s", "id": 159, "pid": 2338711, "tid": 2338711, "ts": 6274141486688.839, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141486718.462, "dur": 3.804, + "args": { + "External id": 499955,"Sequence number": 5516076, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141486719.471, "dur": 2.678, + "args": { + "External id": 499956,"Sequence number": 5516076, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14136 + } + }, + { + "ph": "s", "id": 158, "pid": 2338711, "tid": 2338711, "ts": 6274141486719.471, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141486729.689, "dur": 3.033, + "args": { + "External id": 499957,"Sequence number": 5516077, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141486730.603, "dur": 1.995, + "args": { + "External id": 499958,"Sequence number": 5516077, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14138 + } + }, + { + "ph": "s", "id": 157, "pid": 2338711, "tid": 2338711, "ts": 6274141486730.603, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141486739.259, "dur": 4.640, + "args": { + "External id": 499959,"Sequence number": 5516078, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141486740.442, "dur": 3.314, + "args": { + "External id": 499960,"Sequence number": 5516078, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14140 + } + }, + { + "ph": "s", "id": 156, "pid": 2338711, "tid": 2338711, "ts": 6274141486740.442, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141486781.010, "dur": 184.985, + "args": { + "External id": 499961,"Sequence number": 5516079, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14141 + } + }, + { + "ph": "s", "id": 155, "pid": 2338711, "tid": 2338711, "ts": 6274141486781.010, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141486806.662, "dur": 9.223, + "args": { + "External id": 499962,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141486810.150, "dur": 5.202, + "args": { + "External id": 499963,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141486980.153, "dur": 167.539, + "args": { + "External id": 499964,"Sequence number": 5516080, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14144 + } + }, + { + "ph": "s", "id": 154, "pid": 2338711, "tid": 2338711, "ts": 6274141486980.153, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141486995.598, "dur": 6.515, + "args": { + "External id": 499965,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 14145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141486998.122, "dur": 3.631, + "args": { + "External id": 499966,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338711, "tid": 2338711, + "ts": 6274141487182.337, "dur": 196.104, + "args": { + "External id": 499967,"Sequence number": 5516081, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 14147 + } + }, + { + "ph": "s", "id": 153, "pid": 2338711, "tid": 2338711, "ts": 6274141487182.337, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274141487212.481, "dur": 135.914, + "args": { + "External id": 499968,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141487266.699, "dur": 8.495, + "args": { + "External id": 499969,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141487269.477, "dur": 5.380, + "args": { + "External id": 499970,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141487281.349, "dur": 4.081, + "args": { + "External id": 499971,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141487289.386, "dur": 1.089, + "args": { + "External id": 499972,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141487292.688, "dur": 3.016, + "args": { + "External id": 499973,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2338711, + "ts": 6274141487363.823, "dur": 5.568, + "args": { + "External id": 499974,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 14154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141487383.927, "dur": 6.192, + "args": { + "External id": 499975,"Sequence number": 5516082, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141487385.391, "dur": 4.550, + "args": { + "External id": 499976,"Sequence number": 5516082, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14156 + } + }, + { + "ph": "s", "id": 152, "pid": 2338711, "tid": 2338711, "ts": 6274141487385.391, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141487405.043, "dur": 112.994, + "args": { + "External id": 499977,"Sequence number": 5516083, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141487406.301, "dur": 8.895, + "args": { + "External id": 499978,"Sequence number": 5516083, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14158 + } + }, + { + "ph": "s", "id": 151, "pid": 2338711, "tid": 2338711, "ts": 6274141487406.301, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141487409.190, "dur": 4.958, + "args": { + "External id": 499979,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141487411.972, "dur": 1.876, + "args": { + "External id": 499980,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141487416.713, "dur": 101.002, + "args": { + "External id": 499981,"Sequence number": 5516084, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141487420.357, "dur": 3.901, + "args": { + "External id": 499982,"Sequence number": 5516084, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141487421.376, "dur": 2.759, + "args": { + "External id": 499983,"Sequence number": 5516084, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14163 + } + }, + { + "ph": "s", "id": 150, "pid": 2338711, "tid": 2338711, "ts": 6274141487421.376, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141487425.241, "dur": 86.072, + "args": { + "External id": 499984,"Sequence number": 5516085, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14164 + } + }, + { + "ph": "s", "id": 149, "pid": 2338711, "tid": 2338711, "ts": 6274141487425.241, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141487513.836, "dur": 3.195, + "args": { + "External id": 499985,"Sequence number": 5516086, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14165 + } + }, + { + "ph": "s", "id": 148, "pid": 2338711, "tid": 2338711, "ts": 6274141487513.836, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141487553.109, "dur": 250.924, + "args": { + "External id": 499986,"Sequence number": 5516087, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 14166 + } + }, + { + "ph": "s", "id": 147, "pid": 2338711, "tid": 2338711, "ts": 6274141487553.109, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141487573.093, "dur": 2.572, + "args": { + "External id": 499987,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141487573.875, "dur": 1.669, + "args": { + "External id": 499988,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338711, "tid": 2338711, + "ts": 6274141487581.023, "dur": 25.015, + "args": { + "External id": 499989,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 14169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141487582.179, "dur": 23.698, + "args": { + "External id": 499990,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141487601.789, "dur": 3.773, + "args": { + "External id": 499991,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141487615.602, "dur": 7.869, + "args": { + "External id": 499992,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141487617.981, "dur": 5.118, + "args": { + "External id": 499993,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141487629.989, "dur": 2.564, + "args": { + "External id": 499994,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141487638.457, "dur": 2.688, + "args": { + "External id": 499995,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141487778.006, "dur": 3.392, + "args": { + "External id": 499996,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141487779.182, "dur": 2.008, + "args": { + "External id": 499997,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141487786.419, "dur": 2.341, + "args": { + "External id": 499998,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141487787.466, "dur": 1.201, + "args": { + "External id": 499999,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141487823.190, "dur": 104.542, + "args": { + "External id": 500000,"Sequence number": 5516088, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141487824.542, "dur": 8.217, + "args": { + "External id": 500001,"Sequence number": 5516088, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14181 + } + }, + { + "ph": "s", "id": 146, "pid": 2338711, "tid": 2338711, "ts": 6274141487824.542, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141487827.064, "dur": 4.407, + "args": { + "External id": 500002,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141487829.508, "dur": 1.683, + "args": { + "External id": 500003,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141487836.364, "dur": 91.070, + "args": { + "External id": 500004,"Sequence number": 5516089, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141487837.904, "dur": 4.265, + "args": { + "External id": 500005,"Sequence number": 5516089, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141487838.893, "dur": 3.144, + "args": { + "External id": 500006,"Sequence number": 5516089, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14186 + } + }, + { + "ph": "s", "id": 145, "pid": 2338711, "tid": 2338711, "ts": 6274141487838.893, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141487843.054, "dur": 72.023, + "args": { + "External id": 500007,"Sequence number": 5516090, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14187 + } + }, + { + "ph": "s", "id": 144, "pid": 2338711, "tid": 2338711, "ts": 6274141487843.054, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141487917.521, "dur": 9.298, + "args": { + "External id": 500008,"Sequence number": 5516091, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14188 + } + }, + { + "ph": "s", "id": 143, "pid": 2338711, "tid": 2338711, "ts": 6274141487917.521, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141487936.066, "dur": 68.625, + "args": { + "External id": 500009,"Sequence number": 5516092, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141487936.675, "dur": 5.588, + "args": { + "External id": 500010,"Sequence number": 5516092, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14190 + } + }, + { + "ph": "s", "id": 142, "pid": 2338711, "tid": 2338711, "ts": 6274141487936.675, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141487938.337, "dur": 2.758, + "args": { + "External id": 500011,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141487940.002, "dur": 0.930, + "args": { + "External id": 500012,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141487942.922, "dur": 61.516, + "args": { + "External id": 500013,"Sequence number": 5516093, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141487946.415, "dur": 5.121, + "args": { + "External id": 500014,"Sequence number": 5516093, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141487947.449, "dur": 3.947, + "args": { + "External id": 500015,"Sequence number": 5516093, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14195 + } + }, + { + "ph": "s", "id": 141, "pid": 2338711, "tid": 2338711, "ts": 6274141487947.449, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141487952.346, "dur": 47.897, + "args": { + "External id": 500016,"Sequence number": 5516094, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14196 + } + }, + { + "ph": "s", "id": 140, "pid": 2338711, "tid": 2338711, "ts": 6274141487952.346, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141488002.283, "dur": 1.770, + "args": { + "External id": 500017,"Sequence number": 5516095, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14197 + } + }, + { + "ph": "s", "id": 139, "pid": 2338711, "tid": 2338711, "ts": 6274141488002.283, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141488029.441, "dur": 213.873, + "args": { + "External id": 500018,"Sequence number": 5516096, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14198 + } + }, + { + "ph": "s", "id": 138, "pid": 2338711, "tid": 2338711, "ts": 6274141488029.441, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141488108.635, "dur": 5.348, + "args": { + "External id": 500019,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141488151.680, "dur": 74.959, + "args": { + "External id": 500020,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141488152.507, "dur": 8.748, + "args": { + "External id": 500021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 14201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141488154.135, "dur": 5.952, + "args": { + "External id": 500022,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 14202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141488158.624, "dur": 1.276, + "args": { + "External id": 500023,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 14203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141488164.062, "dur": 62.256, + "args": { + "External id": 500024,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 14204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141488165.909, "dur": 2.871, + "args": { + "External id": 500025,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141488167.078, "dur": 1.606, + "args": { + "External id": 500026,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141488169.449, "dur": 52.410, + "args": { + "External id": 500027,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 14207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141488224.410, "dur": 1.177, + "args": { + "External id": 500028,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2338711, + "ts": 6274141488254.127, "dur": 27.664, + "args": { + "External id": 500029,"Sequence number": 5516097, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 14209 + } + }, + { + "ph": "s", "id": 137, "pid": 2338711, "tid": 2338711, "ts": 6274141488254.127, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141488318.508, "dur": 190.401, + "args": { + "External id": 500030,"Sequence number": 5516098, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 14210 + } + }, + { + "ph": "s", "id": 136, "pid": 2338711, "tid": 2338711, "ts": 6274141488318.508, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141488338.808, "dur": 3.621, + "args": { + "External id": 500031,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141488339.746, "dur": 2.256, + "args": { + "External id": 500032,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141488350.662, "dur": 7.693, + "args": { + "External id": 500033,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141488353.451, "dur": 4.534, + "args": { + "External id": 500034,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141488364.614, "dur": 3.294, + "args": { + "External id": 500035,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141488491.885, "dur": 3.212, + "args": { + "External id": 500036,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141488492.903, "dur": 1.843, + "args": { + "External id": 500037,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141488528.190, "dur": 115.210, + "args": { + "External id": 500038,"Sequence number": 5516099, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141488529.006, "dur": 8.124, + "args": { + "External id": 500039,"Sequence number": 5516099, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14219 + } + }, + { + "ph": "s", "id": 135, "pid": 2338711, "tid": 2338711, "ts": 6274141488529.006, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141488531.685, "dur": 4.088, + "args": { + "External id": 500040,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141488534.179, "dur": 1.364, + "args": { + "External id": 500041,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141488538.070, "dur": 105.014, + "args": { + "External id": 500042,"Sequence number": 5516100, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141488541.843, "dur": 5.981, + "args": { + "External id": 500043,"Sequence number": 5516100, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141488542.350, "dur": 5.310, + "args": { + "External id": 500044,"Sequence number": 5516100, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14224 + } + }, + { + "ph": "s", "id": 134, "pid": 2338711, "tid": 2338711, "ts": 6274141488542.350, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141488548.631, "dur": 83.994, + "args": { + "External id": 500045,"Sequence number": 5516101, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14225 + } + }, + { + "ph": "s", "id": 133, "pid": 2338711, "tid": 2338711, "ts": 6274141488548.631, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141488636.566, "dur": 5.812, + "args": { + "External id": 500046,"Sequence number": 5516102, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14226 + } + }, + { + "ph": "s", "id": 132, "pid": 2338711, "tid": 2338711, "ts": 6274141488636.566, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141488653.287, "dur": 76.048, + "args": { + "External id": 500047,"Sequence number": 5516103, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141488656.004, "dur": 8.483, + "args": { + "External id": 500048,"Sequence number": 5516103, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14228 + } + }, + { + "ph": "s", "id": 131, "pid": 2338711, "tid": 2338711, "ts": 6274141488656.004, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141488660.374, "dur": 3.044, + "args": { + "External id": 500049,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141488662.356, "dur": 0.864, + "args": { + "External id": 500050,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141488665.158, "dur": 63.984, + "args": { + "External id": 500051,"Sequence number": 5516104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141488666.157, "dur": 5.353, + "args": { + "External id": 500052,"Sequence number": 5516104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141488669.212, "dur": 2.160, + "args": { + "External id": 500053,"Sequence number": 5516104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14233 + } + }, + { + "ph": "s", "id": 130, "pid": 2338711, "tid": 2338711, "ts": 6274141488669.212, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141488672.123, "dur": 49.215, + "args": { + "External id": 500054,"Sequence number": 5516105, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14234 + } + }, + { + "ph": "s", "id": 129, "pid": 2338711, "tid": 2338711, "ts": 6274141488672.123, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141488722.936, "dur": 5.640, + "args": { + "External id": 500055,"Sequence number": 5516106, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14235 + } + }, + { + "ph": "s", "id": 128, "pid": 2338711, "tid": 2338711, "ts": 6274141488722.936, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141488736.369, "dur": 59.941, + "args": { + "External id": 500056,"Sequence number": 5516107, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141488737.072, "dur": 8.633, + "args": { + "External id": 500057,"Sequence number": 5516107, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14237 + } + }, + { + "ph": "s", "id": 127, "pid": 2338711, "tid": 2338711, "ts": 6274141488737.072, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141488741.232, "dur": 3.104, + "args": { + "External id": 500058,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141488743.140, "dur": 0.952, + "args": { + "External id": 500059,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141488746.243, "dur": 49.883, + "args": { + "External id": 500060,"Sequence number": 5516108, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141488747.516, "dur": 8.027, + "args": { + "External id": 500061,"Sequence number": 5516108, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141488748.472, "dur": 6.946, + "args": { + "External id": 500062,"Sequence number": 5516108, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14242 + } + }, + { + "ph": "s", "id": 126, "pid": 2338711, "tid": 2338711, "ts": 6274141488748.472, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141488756.238, "dur": 36.207, + "args": { + "External id": 500063,"Sequence number": 5516109, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14243 + } + }, + { + "ph": "s", "id": 125, "pid": 2338711, "tid": 2338711, "ts": 6274141488756.238, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141488794.108, "dur": 1.631, + "args": { + "External id": 500064,"Sequence number": 5516110, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14244 + } + }, + { + "ph": "s", "id": 124, "pid": 2338711, "tid": 2338711, "ts": 6274141488794.108, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141488813.838, "dur": 4.345, + "args": { + "External id": 500065,"Sequence number": 5516111, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141488815.346, "dur": 2.720, + "args": { + "External id": 500066,"Sequence number": 5516111, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14246 + } + }, + { + "ph": "s", "id": 123, "pid": 2338711, "tid": 2338711, "ts": 6274141488815.346, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141488827.682, "dur": 4.717, + "args": { + "External id": 500067,"Sequence number": 5516112, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141488828.553, "dur": 3.706, + "args": { + "External id": 500068,"Sequence number": 5516112, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14248 + } + }, + { + "ph": "s", "id": 122, "pid": 2338711, "tid": 2338711, "ts": 6274141488828.553, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141488836.651, "dur": 2.971, + "args": { + "External id": 500069,"Sequence number": 5516113, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141488837.515, "dur": 1.982, + "args": { + "External id": 500070,"Sequence number": 5516113, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14250 + } + }, + { + "ph": "s", "id": 121, "pid": 2338711, "tid": 2338711, "ts": 6274141488837.515, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141488869.099, "dur": 156.333, + "args": { + "External id": 500071,"Sequence number": 5516114, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14251 + } + }, + { + "ph": "s", "id": 120, "pid": 2338711, "tid": 2338711, "ts": 6274141488869.099, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141488889.500, "dur": 8.974, + "args": { + "External id": 500072,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141488892.796, "dur": 5.255, + "args": { + "External id": 500073,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141489037.787, "dur": 156.088, + "args": { + "External id": 500074,"Sequence number": 5516115, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14254 + } + }, + { + "ph": "s", "id": 119, "pid": 2338711, "tid": 2338711, "ts": 6274141489037.787, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141489050.846, "dur": 44.544, + "args": { + "External id": 500075,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 14255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141489089.284, "dur": 5.568, + "args": { + "External id": 500076,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338711, "tid": 2338711, + "ts": 6274141489225.917, "dur": 185.651, + "args": { + "External id": 500077,"Sequence number": 5516116, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 14257 + } + }, + { + "ph": "s", "id": 118, "pid": 2338711, "tid": 2338711, "ts": 6274141489225.917, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274141489257.301, "dur": 124.909, + "args": { + "External id": 500078,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141489306.359, "dur": 9.738, + "args": { + "External id": 500079,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141489310.076, "dur": 5.541, + "args": { + "External id": 500080,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141489318.829, "dur": 3.635, + "args": { + "External id": 500081,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141489323.905, "dur": 1.307, + "args": { + "External id": 500082,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141489327.536, "dur": 3.167, + "args": { + "External id": 500083,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2338711, + "ts": 6274141489395.306, "dur": 4.611, + "args": { + "External id": 500084,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 14264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141489416.556, "dur": 5.641, + "args": { + "External id": 500085,"Sequence number": 5516117, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141489418.018, "dur": 4.046, + "args": { + "External id": 500086,"Sequence number": 5516117, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14266 + } + }, + { + "ph": "s", "id": 117, "pid": 2338711, "tid": 2338711, "ts": 6274141489418.018, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141489433.637, "dur": 107.061, + "args": { + "External id": 500087,"Sequence number": 5516118, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141489434.577, "dur": 8.484, + "args": { + "External id": 500088,"Sequence number": 5516118, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14268 + } + }, + { + "ph": "s", "id": 116, "pid": 2338711, "tid": 2338711, "ts": 6274141489434.577, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141489437.482, "dur": 4.602, + "args": { + "External id": 500089,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141489440.295, "dur": 1.510, + "args": { + "External id": 500090,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141489446.328, "dur": 94.100, + "args": { + "External id": 500091,"Sequence number": 5516119, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141489448.250, "dur": 3.097, + "args": { + "External id": 500092,"Sequence number": 5516119, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141489449.184, "dur": 2.047, + "args": { + "External id": 500093,"Sequence number": 5516119, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14273 + } + }, + { + "ph": "s", "id": 115, "pid": 2338711, "tid": 2338711, "ts": 6274141489449.184, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141489452.196, "dur": 78.129, + "args": { + "External id": 500094,"Sequence number": 5516120, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14274 + } + }, + { + "ph": "s", "id": 114, "pid": 2338711, "tid": 2338711, "ts": 6274141489452.196, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141489532.821, "dur": 6.936, + "args": { + "External id": 500095,"Sequence number": 5516121, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14275 + } + }, + { + "ph": "s", "id": 113, "pid": 2338711, "tid": 2338711, "ts": 6274141489532.821, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141489601.602, "dur": 223.675, + "args": { + "External id": 500096,"Sequence number": 5516122, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 14276 + } + }, + { + "ph": "s", "id": 112, "pid": 2338711, "tid": 2338711, "ts": 6274141489601.602, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141489623.066, "dur": 6.518, + "args": { + "External id": 500097,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141489624.270, "dur": 4.926, + "args": { + "External id": 500098,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338711, "tid": 2338711, + "ts": 6274141489634.548, "dur": 5.885, + "args": { + "External id": 500099,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 14279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141489637.926, "dur": 2.407, + "args": { + "External id": 500100,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141489638.932, "dur": 1.201, + "args": { + "External id": 500101,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141489647.972, "dur": 8.023, + "args": { + "External id": 500102,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141489650.108, "dur": 5.497, + "args": { + "External id": 500103,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141489662.256, "dur": 2.770, + "args": { + "External id": 500104,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141489668.731, "dur": 3.819, + "args": { + "External id": 500105,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141489801.612, "dur": 3.629, + "args": { + "External id": 500106,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141489802.566, "dur": 2.428, + "args": { + "External id": 500107,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141489808.075, "dur": 2.140, + "args": { + "External id": 500108,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141489809.062, "dur": 1.069, + "args": { + "External id": 500109,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141489844.548, "dur": 102.889, + "args": { + "External id": 500110,"Sequence number": 5516123, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141489845.688, "dur": 10.040, + "args": { + "External id": 500111,"Sequence number": 5516123, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14291 + } + }, + { + "ph": "s", "id": 111, "pid": 2338711, "tid": 2338711, "ts": 6274141489845.688, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141489848.244, "dur": 6.293, + "args": { + "External id": 500112,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141489852.581, "dur": 1.664, + "args": { + "External id": 500113,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141489856.576, "dur": 90.600, + "args": { + "External id": 500114,"Sequence number": 5516124, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141489858.126, "dur": 6.158, + "args": { + "External id": 500115,"Sequence number": 5516124, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141489858.949, "dur": 5.233, + "args": { + "External id": 500116,"Sequence number": 5516124, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14296 + } + }, + { + "ph": "s", "id": 110, "pid": 2338711, "tid": 2338711, "ts": 6274141489858.949, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141489867.183, "dur": 74.925, + "args": { + "External id": 500117,"Sequence number": 5516125, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14297 + } + }, + { + "ph": "s", "id": 109, "pid": 2338711, "tid": 2338711, "ts": 6274141489867.183, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141489944.477, "dur": 2.176, + "args": { + "External id": 500118,"Sequence number": 5516126, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14298 + } + }, + { + "ph": "s", "id": 108, "pid": 2338711, "tid": 2338711, "ts": 6274141489944.477, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141489955.766, "dur": 88.097, + "args": { + "External id": 500119,"Sequence number": 5516127, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141489956.375, "dur": 24.366, + "args": { + "External id": 500120,"Sequence number": 5516127, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14300 + } + }, + { + "ph": "s", "id": 107, "pid": 2338711, "tid": 2338711, "ts": 6274141489956.375, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141489965.102, "dur": 14.560, + "args": { + "External id": 500121,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141489978.695, "dur": 0.821, + "args": { + "External id": 500122,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141489981.422, "dur": 62.151, + "args": { + "External id": 500123,"Sequence number": 5516128, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141489982.457, "dur": 6.324, + "args": { + "External id": 500124,"Sequence number": 5516128, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141489983.232, "dur": 5.376, + "args": { + "External id": 500125,"Sequence number": 5516128, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14305 + } + }, + { + "ph": "s", "id": 106, "pid": 2338711, "tid": 2338711, "ts": 6274141489983.232, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141489989.387, "dur": 46.486, + "args": { + "External id": 500126,"Sequence number": 5516129, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14306 + } + }, + { + "ph": "s", "id": 105, "pid": 2338711, "tid": 2338711, "ts": 6274141489989.387, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141490037.614, "dur": 5.643, + "args": { + "External id": 500127,"Sequence number": 5516130, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14307 + } + }, + { + "ph": "s", "id": 104, "pid": 2338711, "tid": 2338711, "ts": 6274141490037.614, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141490105.157, "dur": 167.909, + "args": { + "External id": 500128,"Sequence number": 5516131, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14308 + } + }, + { + "ph": "s", "id": 103, "pid": 2338711, "tid": 2338711, "ts": 6274141490105.157, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141490144.974, "dur": 5.809, + "args": { + "External id": 500129,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141490190.716, "dur": 66.059, + "args": { + "External id": 500130,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141490191.682, "dur": 6.561, + "args": { + "External id": 500131,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 14311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141490193.539, "dur": 3.804, + "args": { + "External id": 500132,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 14312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141490195.454, "dur": 1.734, + "args": { + "External id": 500133,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 14313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141490198.913, "dur": 57.491, + "args": { + "External id": 500134,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 14314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141490200.624, "dur": 2.020, + "args": { + "External id": 500135,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141490201.404, "dur": 1.112, + "args": { + "External id": 500136,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141490203.490, "dur": 48.641, + "args": { + "External id": 500137,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 14317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141490254.402, "dur": 1.332, + "args": { + "External id": 500138,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2338711, + "ts": 6274141490283.371, "dur": 26.462, + "args": { + "External id": 500139,"Sequence number": 5516132, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 14319 + } + }, + { + "ph": "s", "id": 102, "pid": 2338711, "tid": 2338711, "ts": 6274141490283.371, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141490348.958, "dur": 182.705, + "args": { + "External id": 500140,"Sequence number": 5516133, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 14320 + } + }, + { + "ph": "s", "id": 101, "pid": 2338711, "tid": 2338711, "ts": 6274141490348.958, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141490367.522, "dur": 2.770, + "args": { + "External id": 500141,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141490368.341, "dur": 1.825, + "args": { + "External id": 500142,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141490378.572, "dur": 7.000, + "args": { + "External id": 500143,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141490381.121, "dur": 4.077, + "args": { + "External id": 500144,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141490392.138, "dur": 5.104, + "args": { + "External id": 500145,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141490517.297, "dur": 3.131, + "args": { + "External id": 500146,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141490518.419, "dur": 1.782, + "args": { + "External id": 500147,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141490548.257, "dur": 104.163, + "args": { + "External id": 500148,"Sequence number": 5516134, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141490549.279, "dur": 6.246, + "args": { + "External id": 500149,"Sequence number": 5516134, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14329 + } + }, + { + "ph": "s", "id": 100, "pid": 2338711, "tid": 2338711, "ts": 6274141490549.279, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141490551.240, "dur": 3.054, + "args": { + "External id": 500150,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141490553.125, "dur": 1.017, + "args": { + "External id": 500151,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141490558.979, "dur": 93.232, + "args": { + "External id": 500152,"Sequence number": 5516135, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141490560.336, "dur": 3.568, + "args": { + "External id": 500153,"Sequence number": 5516135, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141490561.153, "dur": 2.622, + "args": { + "External id": 500154,"Sequence number": 5516135, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14334 + } + }, + { + "ph": "s", "id": 99, "pid": 2338711, "tid": 2338711, "ts": 6274141490561.153, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141490564.548, "dur": 79.194, + "args": { + "External id": 500155,"Sequence number": 5516136, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14335 + } + }, + { + "ph": "s", "id": 98, "pid": 2338711, "tid": 2338711, "ts": 6274141490564.548, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141490647.082, "dur": 4.447, + "args": { + "External id": 500156,"Sequence number": 5516137, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14336 + } + }, + { + "ph": "s", "id": 97, "pid": 2338711, "tid": 2338711, "ts": 6274141490647.082, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141490662.134, "dur": 64.742, + "args": { + "External id": 500157,"Sequence number": 5516138, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141490662.974, "dur": 7.082, + "args": { + "External id": 500158,"Sequence number": 5516138, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14338 + } + }, + { + "ph": "s", "id": 96, "pid": 2338711, "tid": 2338711, "ts": 6274141490662.974, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141490665.524, "dur": 3.138, + "args": { + "External id": 500159,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141490667.554, "dur": 0.973, + "args": { + "External id": 500160,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141490670.978, "dur": 55.633, + "args": { + "External id": 500161,"Sequence number": 5516139, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141490674.614, "dur": 3.203, + "args": { + "External id": 500162,"Sequence number": 5516139, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141490675.698, "dur": 1.998, + "args": { + "External id": 500163,"Sequence number": 5516139, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14343 + } + }, + { + "ph": "s", "id": 95, "pid": 2338711, "tid": 2338711, "ts": 6274141490675.698, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141490678.321, "dur": 44.086, + "args": { + "External id": 500164,"Sequence number": 5516140, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14344 + } + }, + { + "ph": "s", "id": 94, "pid": 2338711, "tid": 2338711, "ts": 6274141490678.321, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141490724.161, "dur": 2.097, + "args": { + "External id": 500165,"Sequence number": 5516141, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14345 + } + }, + { + "ph": "s", "id": 93, "pid": 2338711, "tid": 2338711, "ts": 6274141490724.161, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141490733.282, "dur": 61.720, + "args": { + "External id": 500166,"Sequence number": 5516142, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141490736.132, "dur": 8.073, + "args": { + "External id": 500167,"Sequence number": 5516142, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14347 + } + }, + { + "ph": "s", "id": 92, "pid": 2338711, "tid": 2338711, "ts": 6274141490736.132, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141490738.267, "dur": 4.615, + "args": { + "External id": 500168,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141490739.887, "dur": 2.795, + "args": { + "External id": 500169,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141490744.707, "dur": 49.981, + "args": { + "External id": 500170,"Sequence number": 5516143, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141490745.620, "dur": 6.414, + "args": { + "External id": 500171,"Sequence number": 5516143, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141490748.736, "dur": 3.178, + "args": { + "External id": 500172,"Sequence number": 5516143, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14352 + } + }, + { + "ph": "s", "id": 91, "pid": 2338711, "tid": 2338711, "ts": 6274141490748.736, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141490752.528, "dur": 34.569, + "args": { + "External id": 500173,"Sequence number": 5516144, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14353 + } + }, + { + "ph": "s", "id": 90, "pid": 2338711, "tid": 2338711, "ts": 6274141490752.528, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141490788.622, "dur": 5.767, + "args": { + "External id": 500174,"Sequence number": 5516145, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14354 + } + }, + { + "ph": "s", "id": 89, "pid": 2338711, "tid": 2338711, "ts": 6274141490788.622, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141490811.904, "dur": 6.323, + "args": { + "External id": 500175,"Sequence number": 5516146, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141490815.336, "dur": 2.716, + "args": { + "External id": 500176,"Sequence number": 5516146, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14356 + } + }, + { + "ph": "s", "id": 88, "pid": 2338711, "tid": 2338711, "ts": 6274141490815.336, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141490824.993, "dur": 3.362, + "args": { + "External id": 500177,"Sequence number": 5516147, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141490826.535, "dur": 1.711, + "args": { + "External id": 500178,"Sequence number": 5516147, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14358 + } + }, + { + "ph": "s", "id": 87, "pid": 2338711, "tid": 2338711, "ts": 6274141490826.535, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141490832.680, "dur": 4.976, + "args": { + "External id": 500179,"Sequence number": 5516148, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141490833.502, "dur": 4.050, + "args": { + "External id": 500180,"Sequence number": 5516148, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14360 + } + }, + { + "ph": "s", "id": 86, "pid": 2338711, "tid": 2338711, "ts": 6274141490833.502, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141490868.164, "dur": 145.204, + "args": { + "External id": 500181,"Sequence number": 5516149, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14361 + } + }, + { + "ph": "s", "id": 85, "pid": 2338711, "tid": 2338711, "ts": 6274141490868.164, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141490886.662, "dur": 10.539, + "args": { + "External id": 500182,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141490889.792, "dur": 6.908, + "args": { + "External id": 500183,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141491025.356, "dur": 150.574, + "args": { + "External id": 500184,"Sequence number": 5516150, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14364 + } + }, + { + "ph": "s", "id": 84, "pid": 2338711, "tid": 2338711, "ts": 6274141491025.356, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141491038.574, "dur": 6.463, + "args": { + "External id": 500185,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 14365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141491040.770, "dur": 3.908, + "args": { + "External id": 500186,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338711, "tid": 2338711, + "ts": 6274141491208.135, "dur": 183.799, + "args": { + "External id": 500187,"Sequence number": 5516151, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 14367 + } + }, + { + "ph": "s", "id": 83, "pid": 2338711, "tid": 2338711, "ts": 6274141491208.135, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274141491234.874, "dur": 130.058, + "args": { + "External id": 500188,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141491282.035, "dur": 9.243, + "args": { + "External id": 500189,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141491285.207, "dur": 5.588, + "args": { + "External id": 500190,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141491294.423, "dur": 5.691, + "args": { + "External id": 500191,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141491301.568, "dur": 1.198, + "args": { + "External id": 500192,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141491307.800, "dur": 3.153, + "args": { + "External id": 500193,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2338711, + "ts": 6274141491377.530, "dur": 4.899, + "args": { + "External id": 500194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 14374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141491397.108, "dur": 5.969, + "args": { + "External id": 500195,"Sequence number": 5516152, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141491398.612, "dur": 4.319, + "args": { + "External id": 500196,"Sequence number": 5516152, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14376 + } + }, + { + "ph": "s", "id": 82, "pid": 2338711, "tid": 2338711, "ts": 6274141491398.612, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141491414.839, "dur": 120.771, + "args": { + "External id": 500197,"Sequence number": 5516153, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141491416.167, "dur": 12.844, + "args": { + "External id": 500198,"Sequence number": 5516153, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14378 + } + }, + { + "ph": "s", "id": 81, "pid": 2338711, "tid": 2338711, "ts": 6274141491416.167, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141491420.959, "dur": 6.822, + "args": { + "External id": 500199,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141491425.786, "dur": 1.777, + "args": { + "External id": 500200,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141491430.338, "dur": 104.845, + "args": { + "External id": 500201,"Sequence number": 5516154, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141491432.622, "dur": 4.573, + "args": { + "External id": 500202,"Sequence number": 5516154, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141491433.654, "dur": 3.436, + "args": { + "External id": 500203,"Sequence number": 5516154, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14383 + } + }, + { + "ph": "s", "id": 80, "pid": 2338711, "tid": 2338711, "ts": 6274141491433.654, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141491440.307, "dur": 84.154, + "args": { + "External id": 500204,"Sequence number": 5516155, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14384 + } + }, + { + "ph": "s", "id": 79, "pid": 2338711, "tid": 2338711, "ts": 6274141491440.307, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141491527.739, "dur": 6.495, + "args": { + "External id": 500205,"Sequence number": 5516156, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14385 + } + }, + { + "ph": "s", "id": 78, "pid": 2338711, "tid": 2338711, "ts": 6274141491527.739, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141491569.101, "dur": 252.453, + "args": { + "External id": 500206,"Sequence number": 5516157, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 14386 + } + }, + { + "ph": "s", "id": 77, "pid": 2338711, "tid": 2338711, "ts": 6274141491569.101, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141491605.690, "dur": 5.966, + "args": { + "External id": 500207,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141491608.673, "dur": 2.622, + "args": { + "External id": 500208,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338711, "tid": 2338711, + "ts": 6274141491616.397, "dur": 3.401, + "args": { + "External id": 500209,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 14389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141491617.744, "dur": 1.947, + "args": { + "External id": 500210,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141491618.755, "dur": 0.840, + "args": { + "External id": 500211,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141491627.309, "dur": 18.486, + "args": { + "External id": 500212,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141491637.782, "dur": 7.676, + "args": { + "External id": 500213,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141491654.475, "dur": 2.857, + "args": { + "External id": 500214,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141491661.092, "dur": 4.975, + "args": { + "External id": 500215,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141491800.556, "dur": 3.450, + "args": { + "External id": 500216,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141491801.672, "dur": 2.004, + "args": { + "External id": 500217,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141491806.545, "dur": 2.203, + "args": { + "External id": 500218,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141491807.377, "dur": 1.280, + "args": { + "External id": 500219,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141491840.250, "dur": 132.543, + "args": { + "External id": 500220,"Sequence number": 5516158, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141491841.593, "dur": 12.100, + "args": { + "External id": 500221,"Sequence number": 5516158, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14401 + } + }, + { + "ph": "s", "id": 76, "pid": 2338711, "tid": 2338711, "ts": 6274141491841.593, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141491846.292, "dur": 5.946, + "args": { + "External id": 500222,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141491848.762, "dur": 3.206, + "args": { + "External id": 500223,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141491854.473, "dur": 117.965, + "args": { + "External id": 500224,"Sequence number": 5516159, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141491882.773, "dur": 5.606, + "args": { + "External id": 500225,"Sequence number": 5516159, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141491885.806, "dur": 2.433, + "args": { + "External id": 500226,"Sequence number": 5516159, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14406 + } + }, + { + "ph": "s", "id": 75, "pid": 2338711, "tid": 2338711, "ts": 6274141491885.806, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141491889.235, "dur": 77.269, + "args": { + "External id": 500227,"Sequence number": 5516160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14407 + } + }, + { + "ph": "s", "id": 74, "pid": 2338711, "tid": 2338711, "ts": 6274141491889.235, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141491969.020, "dur": 2.726, + "args": { + "External id": 500228,"Sequence number": 5516161, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14408 + } + }, + { + "ph": "s", "id": 73, "pid": 2338711, "tid": 2338711, "ts": 6274141491969.020, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141491981.374, "dur": 71.124, + "args": { + "External id": 500229,"Sequence number": 5516162, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141491981.951, "dur": 9.060, + "args": { + "External id": 500230,"Sequence number": 5516162, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14410 + } + }, + { + "ph": "s", "id": 72, "pid": 2338711, "tid": 2338711, "ts": 6274141491981.951, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141491985.851, "dur": 4.052, + "args": { + "External id": 500231,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141491986.971, "dur": 2.782, + "args": { + "External id": 500232,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141491991.588, "dur": 60.665, + "args": { + "External id": 500233,"Sequence number": 5516163, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141491992.586, "dur": 7.404, + "args": { + "External id": 500234,"Sequence number": 5516163, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141491993.173, "dur": 6.676, + "args": { + "External id": 500235,"Sequence number": 5516163, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14415 + } + }, + { + "ph": "s", "id": 71, "pid": 2338711, "tid": 2338711, "ts": 6274141491993.173, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141492000.610, "dur": 44.028, + "args": { + "External id": 500236,"Sequence number": 5516164, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14416 + } + }, + { + "ph": "s", "id": 70, "pid": 2338711, "tid": 2338711, "ts": 6274141492000.610, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141492046.315, "dur": 5.273, + "args": { + "External id": 500237,"Sequence number": 5516165, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14417 + } + }, + { + "ph": "s", "id": 69, "pid": 2338711, "tid": 2338711, "ts": 6274141492046.315, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141492115.329, "dur": 164.790, + "args": { + "External id": 500238,"Sequence number": 5516166, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14418 + } + }, + { + "ph": "s", "id": 68, "pid": 2338711, "tid": 2338711, "ts": 6274141492115.329, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141492159.005, "dur": 5.390, + "args": { + "External id": 500239,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141492197.802, "dur": 66.991, + "args": { + "External id": 500240,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141492198.656, "dur": 6.032, + "args": { + "External id": 500241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 14421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141492200.569, "dur": 3.062, + "args": { + "External id": 500242,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 14422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141492202.480, "dur": 0.906, + "args": { + "External id": 500243,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 14423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141492205.338, "dur": 59.141, + "args": { + "External id": 500244,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 14424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141492207.175, "dur": 4.685, + "args": { + "External id": 500245,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141492210.070, "dur": 1.658, + "args": { + "External id": 500246,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141492212.487, "dur": 46.199, + "args": { + "External id": 500247,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 14427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141492260.816, "dur": 3.079, + "args": { + "External id": 500248,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2338711, + "ts": 6274141492289.495, "dur": 24.369, + "args": { + "External id": 500249,"Sequence number": 5516167, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 14429 + } + }, + { + "ph": "s", "id": 67, "pid": 2338711, "tid": 2338711, "ts": 6274141492289.495, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141492348.895, "dur": 191.568, + "args": { + "External id": 500250,"Sequence number": 5516168, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 14430 + } + }, + { + "ph": "s", "id": 66, "pid": 2338711, "tid": 2338711, "ts": 6274141492348.895, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141492370.439, "dur": 2.855, + "args": { + "External id": 500251,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141492371.302, "dur": 1.825, + "args": { + "External id": 500252,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141492381.704, "dur": 9.061, + "args": { + "External id": 500253,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141492386.451, "dur": 3.943, + "args": { + "External id": 500254,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141492397.311, "dur": 3.480, + "args": { + "External id": 500255,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141492526.687, "dur": 2.817, + "args": { + "External id": 500256,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141492527.772, "dur": 1.492, + "args": { + "External id": 500257,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141492557.939, "dur": 119.441, + "args": { + "External id": 500258,"Sequence number": 5516169, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141492559.076, "dur": 9.995, + "args": { + "External id": 500259,"Sequence number": 5516169, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14439 + } + }, + { + "ph": "s", "id": 65, "pid": 2338711, "tid": 2338711, "ts": 6274141492559.076, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141492561.958, "dur": 5.627, + "args": { + "External id": 500260,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141492565.922, "dur": 1.497, + "args": { + "External id": 500261,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141492570.202, "dur": 106.873, + "args": { + "External id": 500262,"Sequence number": 5516170, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141492571.538, "dur": 3.884, + "args": { + "External id": 500263,"Sequence number": 5516170, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141492572.629, "dur": 2.591, + "args": { + "External id": 500264,"Sequence number": 5516170, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14444 + } + }, + { + "ph": "s", "id": 64, "pid": 2338711, "tid": 2338711, "ts": 6274141492572.629, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141492582.382, "dur": 87.144, + "args": { + "External id": 500265,"Sequence number": 5516171, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14445 + } + }, + { + "ph": "s", "id": 63, "pid": 2338711, "tid": 2338711, "ts": 6274141492582.382, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141492673.194, "dur": 3.081, + "args": { + "External id": 500266,"Sequence number": 5516172, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14446 + } + }, + { + "ph": "s", "id": 62, "pid": 2338711, "tid": 2338711, "ts": 6274141492673.194, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141492687.685, "dur": 64.342, + "args": { + "External id": 500267,"Sequence number": 5516173, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141492688.460, "dur": 8.646, + "args": { + "External id": 500268,"Sequence number": 5516173, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14448 + } + }, + { + "ph": "s", "id": 61, "pid": 2338711, "tid": 2338711, "ts": 6274141492688.460, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141492690.656, "dur": 5.011, + "args": { + "External id": 500269,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141492694.615, "dur": 0.890, + "args": { + "External id": 500270,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141492697.810, "dur": 53.839, + "args": { + "External id": 500271,"Sequence number": 5516174, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141492699.342, "dur": 3.757, + "args": { + "External id": 500272,"Sequence number": 5516174, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141492700.294, "dur": 2.684, + "args": { + "External id": 500273,"Sequence number": 5516174, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14453 + } + }, + { + "ph": "s", "id": 60, "pid": 2338711, "tid": 2338711, "ts": 6274141492700.294, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141492704.034, "dur": 42.413, + "args": { + "External id": 500274,"Sequence number": 5516175, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14454 + } + }, + { + "ph": "s", "id": 59, "pid": 2338711, "tid": 2338711, "ts": 6274141492704.034, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141492748.101, "dur": 2.949, + "args": { + "External id": 500275,"Sequence number": 5516176, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14455 + } + }, + { + "ph": "s", "id": 58, "pid": 2338711, "tid": 2338711, "ts": 6274141492748.101, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141492758.838, "dur": 59.935, + "args": { + "External id": 500276,"Sequence number": 5516177, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141492759.409, "dur": 7.460, + "args": { + "External id": 500277,"Sequence number": 5516177, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14457 + } + }, + { + "ph": "s", "id": 57, "pid": 2338711, "tid": 2338711, "ts": 6274141492759.409, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141492761.329, "dur": 4.515, + "args": { + "External id": 500278,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141492762.983, "dur": 2.683, + "args": { + "External id": 500279,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141492767.454, "dur": 51.055, + "args": { + "External id": 500280,"Sequence number": 5516178, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141492770.669, "dur": 4.260, + "args": { + "External id": 500281,"Sequence number": 5516178, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141492771.327, "dur": 3.469, + "args": { + "External id": 500282,"Sequence number": 5516178, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14462 + } + }, + { + "ph": "s", "id": 56, "pid": 2338711, "tid": 2338711, "ts": 6274141492771.327, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141492775.571, "dur": 37.183, + "args": { + "External id": 500283,"Sequence number": 5516179, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14463 + } + }, + { + "ph": "s", "id": 55, "pid": 2338711, "tid": 2338711, "ts": 6274141492775.571, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141492814.286, "dur": 3.839, + "args": { + "External id": 500284,"Sequence number": 5516180, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14464 + } + }, + { + "ph": "s", "id": 54, "pid": 2338711, "tid": 2338711, "ts": 6274141492814.286, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141492837.669, "dur": 3.608, + "args": { + "External id": 500285,"Sequence number": 5516181, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141492838.598, "dur": 2.573, + "args": { + "External id": 500286,"Sequence number": 5516181, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14466 + } + }, + { + "ph": "s", "id": 53, "pid": 2338711, "tid": 2338711, "ts": 6274141492838.598, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141492847.975, "dur": 4.783, + "args": { + "External id": 500287,"Sequence number": 5516182, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141492848.850, "dur": 3.773, + "args": { + "External id": 500288,"Sequence number": 5516182, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14468 + } + }, + { + "ph": "s", "id": 52, "pid": 2338711, "tid": 2338711, "ts": 6274141492848.850, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141492856.614, "dur": 4.378, + "args": { + "External id": 500289,"Sequence number": 5516183, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141492859.185, "dur": 1.696, + "args": { + "External id": 500290,"Sequence number": 5516183, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14470 + } + }, + { + "ph": "s", "id": 51, "pid": 2338711, "tid": 2338711, "ts": 6274141492859.185, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141492890.258, "dur": 153.285, + "args": { + "External id": 500291,"Sequence number": 5516184, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14471 + } + }, + { + "ph": "s", "id": 50, "pid": 2338711, "tid": 2338711, "ts": 6274141492890.258, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141492909.969, "dur": 9.584, + "args": { + "External id": 500292,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141492912.645, "dur": 6.531, + "args": { + "External id": 500293,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141493090.887, "dur": 125.376, + "args": { + "External id": 500294,"Sequence number": 5516185, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14474 + } + }, + { + "ph": "s", "id": 49, "pid": 2338711, "tid": 2338711, "ts": 6274141493090.887, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141493108.676, "dur": 9.260, + "args": { + "External id": 500295,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 14475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141493111.634, "dur": 5.646, + "args": { + "External id": 500296,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338711, "tid": 2338711, + "ts": 6274141493247.568, "dur": 180.342, + "args": { + "External id": 500297,"Sequence number": 5516186, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 14477 + } + }, + { + "ph": "s", "id": 48, "pid": 2338711, "tid": 2338711, "ts": 6274141493247.568, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274141493273.859, "dur": 128.176, + "args": { + "External id": 500298,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141493324.235, "dur": 6.834, + "args": { + "External id": 500299,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141493326.738, "dur": 3.979, + "args": { + "External id": 500300,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141493333.830, "dur": 4.215, + "args": { + "External id": 500301,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141493339.446, "dur": 1.066, + "args": { + "External id": 500302,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141493343.003, "dur": 3.512, + "args": { + "External id": 500303,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2338711, + "ts": 6274141493414.614, "dur": 4.644, + "args": { + "External id": 500304,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 14484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141493433.135, "dur": 6.068, + "args": { + "External id": 500305,"Sequence number": 5516187, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141493434.613, "dur": 4.436, + "args": { + "External id": 500306,"Sequence number": 5516187, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14486 + } + }, + { + "ph": "s", "id": 47, "pid": 2338711, "tid": 2338711, "ts": 6274141493434.613, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141493450.972, "dur": 109.815, + "args": { + "External id": 500307,"Sequence number": 5516188, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141493452.210, "dur": 11.382, + "args": { + "External id": 500308,"Sequence number": 5516188, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14488 + } + }, + { + "ph": "s", "id": 46, "pid": 2338711, "tid": 2338711, "ts": 6274141493452.210, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141493458.036, "dur": 4.501, + "args": { + "External id": 500309,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141493460.515, "dur": 1.842, + "args": { + "External id": 500310,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141493465.048, "dur": 95.418, + "args": { + "External id": 500311,"Sequence number": 5516189, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141493467.603, "dur": 4.333, + "args": { + "External id": 500312,"Sequence number": 5516189, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141493468.076, "dur": 3.740, + "args": { + "External id": 500313,"Sequence number": 5516189, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14493 + } + }, + { + "ph": "s", "id": 45, "pid": 2338711, "tid": 2338711, "ts": 6274141493468.076, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141493473.022, "dur": 79.482, + "args": { + "External id": 500314,"Sequence number": 5516190, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14494 + } + }, + { + "ph": "s", "id": 44, "pid": 2338711, "tid": 2338711, "ts": 6274141493473.022, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141493555.213, "dur": 4.630, + "args": { + "External id": 500315,"Sequence number": 5516191, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14495 + } + }, + { + "ph": "s", "id": 43, "pid": 2338711, "tid": 2338711, "ts": 6274141493555.213, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141493611.465, "dur": 219.061, + "args": { + "External id": 500316,"Sequence number": 5516192, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 14496 + } + }, + { + "ph": "s", "id": 42, "pid": 2338711, "tid": 2338711, "ts": 6274141493611.465, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141493632.657, "dur": 8.011, + "args": { + "External id": 500317,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141493635.913, "dur": 4.495, + "args": { + "External id": 500318,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338711, "tid": 2338711, + "ts": 6274141493645.001, "dur": 3.449, + "args": { + "External id": 500319,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 14499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141493646.265, "dur": 2.081, + "args": { + "External id": 500320,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141493647.305, "dur": 0.959, + "args": { + "External id": 500321,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141493655.599, "dur": 7.953, + "args": { + "External id": 500322,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141493657.858, "dur": 5.376, + "args": { + "External id": 500323,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141493671.268, "dur": 2.957, + "args": { + "External id": 500324,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141493678.076, "dur": 3.510, + "args": { + "External id": 500325,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141493806.632, "dur": 3.482, + "args": { + "External id": 500326,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141493808.235, "dur": 1.660, + "args": { + "External id": 500327,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141493813.014, "dur": 2.505, + "args": { + "External id": 500328,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141493814.549, "dur": 0.884, + "args": { + "External id": 500329,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141493848.487, "dur": 96.499, + "args": { + "External id": 500330,"Sequence number": 5516193, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141493849.666, "dur": 7.883, + "args": { + "External id": 500331,"Sequence number": 5516193, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14511 + } + }, + { + "ph": "s", "id": 41, "pid": 2338711, "tid": 2338711, "ts": 6274141493849.666, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141493852.154, "dur": 4.132, + "args": { + "External id": 500332,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141493854.453, "dur": 1.527, + "args": { + "External id": 500333,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141493858.324, "dur": 86.425, + "args": { + "External id": 500334,"Sequence number": 5516194, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141493859.844, "dur": 5.452, + "args": { + "External id": 500335,"Sequence number": 5516194, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141493863.181, "dur": 1.997, + "args": { + "External id": 500336,"Sequence number": 5516194, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14516 + } + }, + { + "ph": "s", "id": 40, "pid": 2338711, "tid": 2338711, "ts": 6274141493863.181, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141493866.392, "dur": 72.595, + "args": { + "External id": 500337,"Sequence number": 5516195, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14517 + } + }, + { + "ph": "s", "id": 39, "pid": 2338711, "tid": 2338711, "ts": 6274141493866.392, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141493941.251, "dur": 2.944, + "args": { + "External id": 500338,"Sequence number": 5516196, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14518 + } + }, + { + "ph": "s", "id": 38, "pid": 2338711, "tid": 2338711, "ts": 6274141493941.251, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141493952.442, "dur": 76.770, + "args": { + "External id": 500339,"Sequence number": 5516197, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141493953.245, "dur": 13.279, + "args": { + "External id": 500340,"Sequence number": 5516197, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14520 + } + }, + { + "ph": "s", "id": 37, "pid": 2338711, "tid": 2338711, "ts": 6274141493953.245, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141493959.612, "dur": 5.534, + "args": { + "External id": 500341,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141493961.555, "dur": 3.362, + "args": { + "External id": 500342,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141493967.247, "dur": 61.738, + "args": { + "External id": 500343,"Sequence number": 5516198, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141493968.776, "dur": 6.783, + "args": { + "External id": 500344,"Sequence number": 5516198, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141493972.066, "dur": 3.367, + "args": { + "External id": 500345,"Sequence number": 5516198, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14525 + } + }, + { + "ph": "s", "id": 36, "pid": 2338711, "tid": 2338711, "ts": 6274141493972.066, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141493976.241, "dur": 46.400, + "args": { + "External id": 500346,"Sequence number": 5516199, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14526 + } + }, + { + "ph": "s", "id": 35, "pid": 2338711, "tid": 2338711, "ts": 6274141493976.241, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141494024.379, "dur": 4.174, + "args": { + "External id": 500347,"Sequence number": 5516200, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14527 + } + }, + { + "ph": "s", "id": 34, "pid": 2338711, "tid": 2338711, "ts": 6274141494024.379, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141494048.762, "dur": 203.572, + "args": { + "External id": 500348,"Sequence number": 5516201, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14528 + } + }, + { + "ph": "s", "id": 33, "pid": 2338711, "tid": 2338711, "ts": 6274141494048.762, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141494127.597, "dur": 6.336, + "args": { + "External id": 500349,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141494169.895, "dur": 67.672, + "args": { + "External id": 500350,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141494170.909, "dur": 6.111, + "args": { + "External id": 500351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 14531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141494172.604, "dur": 3.446, + "args": { + "External id": 500352,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 14532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141494174.829, "dur": 1.064, + "args": { + "External id": 500353,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 14533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141494177.904, "dur": 59.233, + "args": { + "External id": 500354,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 14534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274141494179.786, "dur": 4.891, + "args": { + "External id": 500355,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141494183.075, "dur": 1.502, + "args": { + "External id": 500356,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141494185.421, "dur": 47.796, + "args": { + "External id": 500357,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 14537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274141494235.323, "dur": 1.158, + "args": { + "External id": 500358,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2338711, + "ts": 6274141494262.189, "dur": 25.566, + "args": { + "External id": 500359,"Sequence number": 5516202, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 14539 + } + }, + { + "ph": "s", "id": 32, "pid": 2338711, "tid": 2338711, "ts": 6274141494262.189, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2338711, "tid": 2338711, + "ts": 6274141494307.010, "dur": 42.147, + "args": { + "External id": 500360,"Sequence number": 5516203, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "-2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1]], []], "Input Dims": [[[8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096]], []], "Ev Idx": 14540 + } + }, + { + "ph": "s", "id": 31, "pid": 2338711, "tid": 2338711, "ts": 6274141494307.010, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2338711, "tid": 2338711, + "ts": 6274141494316.451, "dur": 27.622, + "args": { + "External id": 500361,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1]], []], "Input Dims": [[[8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096]], []], "Ev Idx": 14541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141494346.106, "dur": 1.267, + "args": { + "External id": 500362,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 16384], []], "Ev Idx": 14542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274141494384.367, "dur": 45.818, + "args": { + "External id": 500363,"Record function id": 0, "Ev Idx": 14543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 2/0", "pid": 2338711, "tid": 2338711, + "ts": 6274141494431.598, "dur": 214.908, + "args": { + "External id": 500364,"Record function id": 0, "Ev Idx": 14544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141494468.584, "dur": 168.272, + "args": { + "External id": 500365,"Sequence number": 5516204, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1], [67108864, 16384, 4096, 1]], "Input Dims": [[4096], [8, 4096, 4, 4096]], "Ev Idx": 14545 + } + }, + { + "ph": "s", "id": 30, "pid": 2338711, "tid": 2338711, "ts": 6274141494468.584, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274141494536.180, "dur": 39.052, + "args": { + "External id": 500366,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[131072, 4096], [131072, 4096], [4096], [131072], [], [], [], [], [], [], [], [], []], "Ev Idx": 14546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2338711, + "ts": 6274141494733.382, "dur": 42.209, + "args": { + "External id": 500367,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 14547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141494736.658, "dur": 6.003, + "args": { + "External id": 500368,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274141494745.218, "dur": 30.062, + "args": { + "External id": 500369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 14549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141494747.772, "dur": 26.939, + "args": { + "External id": 500370,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 14550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2338711, + "ts": 6274141494780.934, "dur": 20.051, + "args": { + "External id": 500371,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 14551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141494782.085, "dur": 3.109, + "args": { + "External id": 500372,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274141494787.781, "dur": 12.966, + "args": { + "External id": 500373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 14553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141494788.520, "dur": 11.857, + "args": { + "External id": 500374,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 14554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2338711, + "ts": 6274141494804.150, "dur": 18.592, + "args": { + "External id": 500375,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 14555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141494805.014, "dur": 4.681, + "args": { + "External id": 500376,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274141494810.355, "dur": 12.145, + "args": { + "External id": 500377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 14557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141494810.911, "dur": 11.289, + "args": { + "External id": 500378,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 14558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141494831.061, "dur": 0.710, + "args": { + "External id": 500379,"Sequence number": 5516205, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], [], []], "Ev Idx": 14559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338711, "tid": 2338711, + "ts": 6274141494839.107, "dur": 11.021, + "args": { + "External id": 500380,"Sequence number": 5516205, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "5", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 14560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141494846.403, "dur": 2.225, + "args": { + "External id": 500381,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 14561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141494856.672, "dur": 6.827, + "args": { + "External id": 500382,"Sequence number": 5516205, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 14562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141494860.834, "dur": 0.757, + "args": { + "External id": 500383,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 14563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141494864.901, "dur": 3.301, + "args": { + "External id": 500384,"Sequence number": 5516205, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 14564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141494867.056, "dur": 0.433, + "args": { + "External id": 500385,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 14565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141494869.358, "dur": 3.347, + "args": { + "External id": 500386,"Sequence number": 5516205, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 14566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141494871.535, "dur": 0.630, + "args": { + "External id": 500387,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 14567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141494875.995, "dur": 5.554, + "args": { + "External id": 500388,"Sequence number": 5516205, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 4], [], [], [], []], "Ev Idx": 14568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141494878.204, "dur": 2.764, + "args": { + "External id": 500389,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 4], [], [], []], "Ev Idx": 14569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141494882.598, "dur": 3.264, + "args": { + "External id": 500390,"Sequence number": 5516205, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 4], [], [], [], []], "Ev Idx": 14570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141494884.301, "dur": 0.686, + "args": { + "External id": 500391,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 4], [], [], []], "Ev Idx": 14571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141494886.815, "dur": 2.872, + "args": { + "External id": 500392,"Sequence number": 5516205, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4096, 4], [], [], [], []], "Ev Idx": 14572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141494888.802, "dur": 0.384, + "args": { + "External id": 500393,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4096, 4], [], [], []], "Ev Idx": 14573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141494893.463, "dur": 4.865, + "args": { + "External id": 500394,"Sequence number": 5516205, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4096, 4], [], []], "Ev Idx": 14574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141494897.198, "dur": 0.393, + "args": { + "External id": 500395,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4096, 4], [], [], []], "Ev Idx": 14575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141494902.718, "dur": 3.110, + "args": { + "External id": 500396,"Sequence number": 5516205, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 14576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141494904.734, "dur": 0.531, + "args": { + "External id": 500397,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 14577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2338711, + "ts": 6274141494910.779, "dur": 6.346, + "args": { + "External id": 500398,"Sequence number": 5516205, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 14578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141494915.469, "dur": 0.579, + "args": { + "External id": 500399,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 14579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141494918.162, "dur": 2.613, + "args": { + "External id": 500400,"Sequence number": 5516205, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 14580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141494919.787, "dur": 0.395, + "args": { + "External id": 500401,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 14581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141494923.221, "dur": 8.134, + "args": { + "External id": 500402,"Sequence number": 5516205, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 14582 + } + }, + { + "ph": "s", "id": 29, "pid": 2338711, "tid": 2338711, "ts": 6274141494923.221, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141494929.027, "dur": 0.730, + "args": { + "External id": 500403,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 14583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141494932.241, "dur": 6.192, + "args": { + "External id": 500404,"Sequence number": 5516206, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 14584 + } + }, + { + "ph": "s", "id": 28, "pid": 2338711, "tid": 2338711, "ts": 6274141494932.241, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141494935.124, "dur": 2.578, + "args": { + "External id": 500405,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 14585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2338711, + "ts": 6274141494939.307, "dur": 7.521, + "args": { + "External id": 500406,"Sequence number": 5516207, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 14586 + } + }, + { + "ph": "s", "id": 27, "pid": 2338711, "tid": 2338711, "ts": 6274141494939.307, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141494945.130, "dur": 0.651, + "args": { + "External id": 500407,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 14587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141494947.923, "dur": 5.002, + "args": { + "External id": 500408,"Sequence number": 5516208, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 14588 + } + }, + { + "ph": "s", "id": 26, "pid": 2338711, "tid": 2338711, "ts": 6274141494947.923, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141494951.594, "dur": 0.491, + "args": { + "External id": 500409,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 14589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2338711, + "ts": 6274141494956.687, "dur": 42.096, + "args": { + "External id": 500410,"Sequence number": 5516209, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 14590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2338711, + "ts": 6274141494958.638, "dur": 39.892, + "args": { + "External id": 500411,"Sequence number": 5516209, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 14591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141494961.740, "dur": 6.525, + "args": { + "External id": 500412,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 14592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141494963.899, "dur": 3.822, + "args": { + "External id": 500413,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141494969.703, "dur": 28.318, + "args": { + "External id": 500414,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 14594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141495022.489, "dur": 5.865, + "args": { + "External id": 500415,"Sequence number": 5516209, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14595 + } + }, + { + "ph": "s", "id": 25, "pid": 2338711, "tid": 2338711, "ts": 6274141495022.489, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141495032.341, "dur": 1.383, + "args": { + "External id": 500416,"Sequence number": 5516210, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 14596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141495099.678, "dur": 144950.778, + "args": { + "External id": 500417,"Sequence number": 5516210, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 14597 + } + }, + { + "ph": "s", "id": 24, "pid": 2338711, "tid": 2338711, "ts": 6274141495099.678, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2338711, + "ts": 6274141495117.396, "dur": 35.292, + "args": { + "External id": 500418,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2338711, + "ts": 6274141495118.437, "dur": 34.074, + "args": { + "External id": 500419,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141495119.951, "dur": 6.836, + "args": { + "External id": 500420,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141495121.715, "dur": 4.570, + "args": { + "External id": 500421,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141495127.814, "dur": 24.223, + "args": { + "External id": 500422,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 14602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141495170.371, "dur": 27.284, + "args": { + "External id": 500423,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141495171.311, "dur": 6.994, + "args": { + "External id": 500424,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141495174.511, "dur": 3.456, + "args": { + "External id": 500425,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274141495179.227, "dur": 18.224, + "args": { + "External id": 500426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 14606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141495182.471, "dur": 14.590, + "args": { + "External id": 500427,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141495201.088, "dur": 20.290, + "args": { + "External id": 500428,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 14608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141495202.144, "dur": 6.232, + "args": { + "External id": 500429,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 14609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141495203.717, "dur": 4.386, + "args": { + "External id": 500430,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274141495208.997, "dur": 12.200, + "args": { + "External id": 500431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141495209.548, "dur": 11.249, + "args": { + "External id": 500432,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 14612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2338711, + "ts": 6274141495226.179, "dur": 21.848, + "args": { + "External id": 500433,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 14613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141495229.684, "dur": 2.892, + "args": { + "External id": 500434,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274141495233.080, "dur": 14.701, + "args": { + "External id": 500435,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 14615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141495236.274, "dur": 11.213, + "args": { + "External id": 500436,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 14616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338711, "tid": 2338711, + "ts": 6274141495254.655, "dur": 29.204, + "args": { + "External id": 500437,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 14617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141495288.447, "dur": 64.218, + "args": { + "External id": 500438,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 14618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141495292.520, "dur": 59.674, + "args": { + "External id": 500439,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141495298.227, "dur": 1.038, + "args": { + "External id": 500440,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 14620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141495300.998, "dur": 30.113, + "args": { + "External id": 500441,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141495302.832, "dur": 28.079, + "args": { + "External id": 500442,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 14622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141495310.951, "dur": 2.500, + "args": { + "External id": 500443,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141495314.343, "dur": 16.272, + "args": { + "External id": 500444,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 14624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338711, "tid": 2338711, + "ts": 6274141495360.005, "dur": 138724.553, + "args": { + "External id": 500445,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 14625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338711, "tid": 2338711, + "ts": 6274141495361.767, "dur": 138721.197, + "args": { + "External id": 500446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 14626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141634099.528, "dur": 8.948, + "args": { + "External id": 500447,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141634105.003, "dur": 1.184, + "args": { + "External id": 500448,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141634114.134, "dur": 120.188, + "args": { + "External id": 500449,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141634115.864, "dur": 6.759, + "args": { + "External id": 500450,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141634118.497, "dur": 3.109, + "args": { + "External id": 500451,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141634120.764, "dur": 0.570, + "args": { + "External id": 500452,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141634124.108, "dur": 109.462, + "args": { + "External id": 500453,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141634131.490, "dur": 101.164, + "args": { + "External id": 500454,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141634238.171, "dur": 5.093, + "args": { + "External id": 500455,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141634240.769, "dur": 0.674, + "args": { + "External id": 500456,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141634253.358, "dur": 4.007, + "args": { + "External id": 500457,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141634270.045, "dur": 9.200, + "args": { + "External id": 500458,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141634272.499, "dur": 6.432, + "args": { + "External id": 500459,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141634432.356, "dur": 253.772, + "args": { + "External id": 500460,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141634437.501, "dur": 2.196, + "args": { + "External id": 500461,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141634442.906, "dur": 242.672, + "args": { + "External id": 500462,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141634445.796, "dur": 0.983, + "args": { + "External id": 500463,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141634450.366, "dur": 28.314, + "args": { + "External id": 500464,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141634480.838, "dur": 5.982, + "args": { + "External id": 500465,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141634485.563, "dur": 0.828, + "args": { + "External id": 500466,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141634489.000, "dur": 28.539, + "args": { + "External id": 500467,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141634490.580, "dur": 1.189, + "args": { + "External id": 500468,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141634496.422, "dur": 20.866, + "args": { + "External id": 500469,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141634501.220, "dur": 3.435, + "args": { + "External id": 500470,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141634524.324, "dur": 25.630, + "args": { + "External id": 500471,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141634552.916, "dur": 18.136, + "args": { + "External id": 500472,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141634574.997, "dur": 31.335, + "args": { + "External id": 500473,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141634609.663, "dur": 16.503, + "args": { + "External id": 500474,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141634628.828, "dur": 25.401, + "args": { + "External id": 500475,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141634631.347, "dur": 2.017, + "args": { + "External id": 500476,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141634637.669, "dur": 0.958, + "args": { + "External id": 500477,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141634656.692, "dur": 13.116, + "args": { + "External id": 500478,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141634671.718, "dur": 12.308, + "args": { + "External id": 500479,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141634695.815, "dur": 2.605, + "args": { + "External id": 500480,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141634706.422, "dur": 4.373, + "args": { + "External id": 500481,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141634709.211, "dur": 0.625, + "args": { + "External id": 500482,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141634796.129, "dur": 75.745, + "args": { + "External id": 500483,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141634877.856, "dur": 9.735, + "args": { + "External id": 500484,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141634883.398, "dur": 2.954, + "args": { + "External id": 500485,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141634889.281, "dur": 30.322, + "args": { + "External id": 500486,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141634926.029, "dur": 6.548, + "args": { + "External id": 500487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141634928.090, "dur": 3.786, + "args": { + "External id": 500488,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141634930.395, "dur": 1.251, + "args": { + "External id": 500489,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141634936.807, "dur": 50.969, + "args": { + "External id": 500490,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141634940.392, "dur": 46.752, + "args": { + "External id": 500491,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141634992.959, "dur": 16.870, + "args": { + "External id": 500492,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141635016.259, "dur": 3.970, + "args": { + "External id": 500493,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141635018.845, "dur": 0.469, + "args": { + "External id": 500494,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141635024.865, "dur": 85.240, + "args": { + "External id": 500495,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141635026.133, "dur": 5.849, + "args": { + "External id": 500496,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141635027.196, "dur": 4.163, + "args": { + "External id": 500497,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141635030.405, "dur": 0.799, + "args": { + "External id": 500498,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141635032.478, "dur": 77.071, + "args": { + "External id": 500499,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141635033.185, "dur": 75.493, + "args": { + "External id": 500500,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141635116.945, "dur": 7.246, + "args": { + "External id": 500501,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141635120.040, "dur": 2.383, + "args": { + "External id": 500502,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141635132.058, "dur": 1.936, + "args": { + "External id": 500503,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141635145.609, "dur": 7.585, + "args": { + "External id": 500504,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141635148.160, "dur": 4.766, + "args": { + "External id": 500505,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141635256.975, "dur": 191.930, + "args": { + "External id": 500506,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141635259.081, "dur": 4.158, + "args": { + "External id": 500507,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141635265.273, "dur": 183.138, + "args": { + "External id": 500508,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141635267.075, "dur": 0.329, + "args": { + "External id": 500509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141635268.619, "dur": 25.022, + "args": { + "External id": 500510,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141635298.364, "dur": 3.079, + "args": { + "External id": 500511,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141635300.546, "dur": 0.667, + "args": { + "External id": 500512,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141635302.609, "dur": 23.831, + "args": { + "External id": 500513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141635304.231, "dur": 1.385, + "args": { + "External id": 500514,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141635307.008, "dur": 19.104, + "args": { + "External id": 500515,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141635312.232, "dur": 2.514, + "args": { + "External id": 500516,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141635328.075, "dur": 20.945, + "args": { + "External id": 500517,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141635351.094, "dur": 14.035, + "args": { + "External id": 500518,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141635367.744, "dur": 12.710, + "args": { + "External id": 500519,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141635382.058, "dur": 12.969, + "args": { + "External id": 500520,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141635396.746, "dur": 22.956, + "args": { + "External id": 500521,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141635401.289, "dur": 1.980, + "args": { + "External id": 500522,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141635405.177, "dur": 0.426, + "args": { + "External id": 500523,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141635421.354, "dur": 13.060, + "args": { + "External id": 500524,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141635435.394, "dur": 11.920, + "args": { + "External id": 500525,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141635455.954, "dur": 1.891, + "args": { + "External id": 500526,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141635466.949, "dur": 3.669, + "args": { + "External id": 500527,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141635469.382, "dur": 0.375, + "args": { + "External id": 500528,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141635543.639, "dur": 75.070, + "args": { + "External id": 500529,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141635626.072, "dur": 6.537, + "args": { + "External id": 500530,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141635629.927, "dur": 1.075, + "args": { + "External id": 500531,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141635634.038, "dur": 26.180, + "args": { + "External id": 500532,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141635665.476, "dur": 8.282, + "args": { + "External id": 500533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141635667.268, "dur": 5.735, + "args": { + "External id": 500534,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141635671.989, "dur": 0.806, + "args": { + "External id": 500535,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141635676.956, "dur": 47.607, + "args": { + "External id": 500536,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141635678.126, "dur": 45.811, + "args": { + "External id": 500537,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141635729.018, "dur": 17.006, + "args": { + "External id": 500538,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141635751.916, "dur": 4.094, + "args": { + "External id": 500539,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141635754.475, "dur": 0.554, + "args": { + "External id": 500540,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141635760.152, "dur": 50.407, + "args": { + "External id": 500541,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141635763.552, "dur": 5.971, + "args": { + "External id": 500542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141635764.409, "dur": 4.567, + "args": { + "External id": 500543,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141635768.221, "dur": 0.622, + "args": { + "External id": 500544,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141635770.209, "dur": 39.896, + "args": { + "External id": 500545,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141635770.646, "dur": 38.919, + "args": { + "External id": 500546,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141635814.572, "dur": 6.183, + "args": { + "External id": 500547,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141635817.043, "dur": 2.468, + "args": { + "External id": 500548,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141635829.928, "dur": 2.229, + "args": { + "External id": 500549,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141635840.285, "dur": 5.972, + "args": { + "External id": 500550,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141635842.422, "dur": 3.524, + "args": { + "External id": 500551,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141635938.561, "dur": 226.022, + "args": { + "External id": 500552,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141635941.017, "dur": 1.543, + "args": { + "External id": 500553,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141635944.365, "dur": 219.664, + "args": { + "External id": 500554,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141635950.399, "dur": 0.451, + "args": { + "External id": 500555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141635952.075, "dur": 22.028, + "args": { + "External id": 500556,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141635975.905, "dur": 3.493, + "args": { + "External id": 500557,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141635978.136, "dur": 1.032, + "args": { + "External id": 500558,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141635980.600, "dur": 22.748, + "args": { + "External id": 500559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141635983.972, "dur": 1.034, + "args": { + "External id": 500560,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141635986.191, "dur": 16.815, + "args": { + "External id": 500561,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141635989.049, "dur": 2.872, + "args": { + "External id": 500562,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141636004.704, "dur": 20.406, + "args": { + "External id": 500563,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141636026.577, "dur": 13.420, + "args": { + "External id": 500564,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141636045.089, "dur": 48.375, + "args": { + "External id": 500565,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141636096.269, "dur": 13.672, + "args": { + "External id": 500566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141636112.119, "dur": 23.625, + "args": { + "External id": 500567,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141636114.421, "dur": 1.994, + "args": { + "External id": 500568,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141636118.431, "dur": 0.650, + "args": { + "External id": 500569,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141636137.468, "dur": 11.460, + "args": { + "External id": 500570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141636152.479, "dur": 10.249, + "args": { + "External id": 500571,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141636173.386, "dur": 2.572, + "args": { + "External id": 500572,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141636186.635, "dur": 4.517, + "args": { + "External id": 500573,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141636189.746, "dur": 0.540, + "args": { + "External id": 500574,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141636265.199, "dur": 84.629, + "args": { + "External id": 500575,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141636355.130, "dur": 4.959, + "args": { + "External id": 500576,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141636358.473, "dur": 0.651, + "args": { + "External id": 500577,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141636361.623, "dur": 32.097, + "args": { + "External id": 500578,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141636400.986, "dur": 8.318, + "args": { + "External id": 500579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141636402.856, "dur": 5.297, + "args": { + "External id": 500580,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141636405.368, "dur": 2.527, + "args": { + "External id": 500581,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141636412.188, "dur": 46.350, + "args": { + "External id": 500582,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141636413.702, "dur": 44.227, + "args": { + "External id": 500583,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141636462.495, "dur": 14.148, + "args": { + "External id": 500584,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141636482.355, "dur": 6.231, + "args": { + "External id": 500585,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141636487.000, "dur": 0.772, + "args": { + "External id": 500586,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141636492.869, "dur": 49.336, + "args": { + "External id": 500587,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141636494.313, "dur": 4.128, + "args": { + "External id": 500588,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141636495.237, "dur": 2.639, + "args": { + "External id": 500589,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141636496.766, "dur": 1.000, + "args": { + "External id": 500590,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141636499.182, "dur": 42.626, + "args": { + "External id": 500591,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141636502.337, "dur": 38.897, + "args": { + "External id": 500592,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141636546.217, "dur": 3.620, + "args": { + "External id": 500593,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141636548.113, "dur": 0.619, + "args": { + "External id": 500594,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141636555.317, "dur": 1.679, + "args": { + "External id": 500595,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141636565.203, "dur": 8.743, + "args": { + "External id": 500596,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141636567.502, "dur": 6.160, + "args": { + "External id": 500597,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141636684.570, "dur": 187.005, + "args": { + "External id": 500598,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141636687.502, "dur": 2.705, + "args": { + "External id": 500599,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141636693.815, "dur": 177.222, + "args": { + "External id": 500600,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141636695.546, "dur": 0.367, + "args": { + "External id": 500601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141636697.323, "dur": 22.190, + "args": { + "External id": 500602,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141636721.098, "dur": 6.403, + "args": { + "External id": 500603,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141636723.910, "dur": 3.229, + "args": { + "External id": 500604,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141636728.458, "dur": 22.837, + "args": { + "External id": 500605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141636729.604, "dur": 1.220, + "args": { + "External id": 500606,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141636734.775, "dur": 16.218, + "args": { + "External id": 500607,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141636737.798, "dur": 2.144, + "args": { + "External id": 500608,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141636752.469, "dur": 19.981, + "args": { + "External id": 500609,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141636773.971, "dur": 13.195, + "args": { + "External id": 500610,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141636790.060, "dur": 12.509, + "args": { + "External id": 500611,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141636803.950, "dur": 12.026, + "args": { + "External id": 500612,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141636817.918, "dur": 22.517, + "args": { + "External id": 500613,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141636819.852, "dur": 1.376, + "args": { + "External id": 500614,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141636823.766, "dur": 2.719, + "args": { + "External id": 500615,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141636844.227, "dur": 12.682, + "args": { + "External id": 500616,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141636858.139, "dur": 11.582, + "args": { + "External id": 500617,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141636879.309, "dur": 2.201, + "args": { + "External id": 500618,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141636891.149, "dur": 4.222, + "args": { + "External id": 500619,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141636893.952, "dur": 0.569, + "args": { + "External id": 500620,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141636967.400, "dur": 59.300, + "args": { + "External id": 500621,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141637031.815, "dur": 5.282, + "args": { + "External id": 500622,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141637035.250, "dur": 0.770, + "args": { + "External id": 500623,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141637040.649, "dur": 72.942, + "args": { + "External id": 500624,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141637120.982, "dur": 7.310, + "args": { + "External id": 500625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141637122.730, "dur": 4.625, + "args": { + "External id": 500626,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141637125.548, "dur": 1.594, + "args": { + "External id": 500627,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141637131.571, "dur": 50.356, + "args": { + "External id": 500628,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141637133.152, "dur": 48.058, + "args": { + "External id": 500629,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141637186.547, "dur": 18.631, + "args": { + "External id": 500630,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141637214.187, "dur": 4.815, + "args": { + "External id": 500631,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141637217.009, "dur": 0.986, + "args": { + "External id": 500632,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141637223.675, "dur": 52.131, + "args": { + "External id": 500633,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141637224.688, "dur": 7.120, + "args": { + "External id": 500634,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141637225.775, "dur": 5.450, + "args": { + "External id": 500635,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141637227.297, "dur": 3.796, + "args": { + "External id": 500636,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141637234.629, "dur": 40.726, + "args": { + "External id": 500637,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141637235.730, "dur": 39.085, + "args": { + "External id": 500638,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141637280.029, "dur": 3.768, + "args": { + "External id": 500639,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141637282.022, "dur": 0.495, + "args": { + "External id": 500640,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141637290.794, "dur": 1.585, + "args": { + "External id": 500641,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141637301.374, "dur": 9.603, + "args": { + "External id": 500642,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141637306.062, "dur": 4.631, + "args": { + "External id": 500643,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141637408.127, "dur": 219.725, + "args": { + "External id": 500644,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141637412.738, "dur": 1.817, + "args": { + "External id": 500645,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141637416.717, "dur": 210.711, + "args": { + "External id": 500646,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141637418.320, "dur": 0.465, + "args": { + "External id": 500647,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141637419.948, "dur": 23.166, + "args": { + "External id": 500648,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141637444.743, "dur": 5.356, + "args": { + "External id": 500649,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141637449.181, "dur": 0.730, + "args": { + "External id": 500650,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141637451.296, "dur": 24.563, + "args": { + "External id": 500651,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141637454.501, "dur": 1.012, + "args": { + "External id": 500652,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141637456.863, "dur": 18.762, + "args": { + "External id": 500653,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141637459.735, "dur": 2.931, + "args": { + "External id": 500654,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141637477.376, "dur": 23.329, + "args": { + "External id": 500655,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141637502.469, "dur": 14.750, + "args": { + "External id": 500656,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141637520.321, "dur": 14.423, + "args": { + "External id": 500657,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141637536.352, "dur": 14.526, + "args": { + "External id": 500658,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141637552.458, "dur": 25.427, + "args": { + "External id": 500659,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141637556.471, "dur": 1.359, + "args": { + "External id": 500660,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141637561.846, "dur": 0.824, + "args": { + "External id": 500661,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141637579.343, "dur": 30.574, + "args": { + "External id": 500662,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141637611.970, "dur": 14.044, + "args": { + "External id": 500663,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141637636.970, "dur": 2.773, + "args": { + "External id": 500664,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141637649.523, "dur": 3.959, + "args": { + "External id": 500665,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141637651.807, "dur": 0.607, + "args": { + "External id": 500666,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141637723.799, "dur": 56.347, + "args": { + "External id": 500667,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141637787.707, "dur": 4.988, + "args": { + "External id": 500668,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141637790.663, "dur": 0.872, + "args": { + "External id": 500669,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141637794.283, "dur": 24.763, + "args": { + "External id": 500670,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141637823.636, "dur": 6.157, + "args": { + "External id": 500671,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141637825.528, "dur": 3.531, + "args": { + "External id": 500672,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141637827.306, "dur": 1.581, + "args": { + "External id": 500673,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141637834.314, "dur": 43.672, + "args": { + "External id": 500674,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141637835.316, "dur": 42.133, + "args": { + "External id": 500675,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141637881.817, "dur": 15.354, + "args": { + "External id": 500676,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141637902.367, "dur": 3.891, + "args": { + "External id": 500677,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141637904.767, "dur": 0.662, + "args": { + "External id": 500678,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141637910.332, "dur": 46.773, + "args": { + "External id": 500679,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141637911.361, "dur": 5.550, + "args": { + "External id": 500680,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141637914.761, "dur": 1.618, + "args": { + "External id": 500681,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141637915.861, "dur": 0.387, + "args": { + "External id": 500682,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141637917.658, "dur": 39.128, + "args": { + "External id": 500683,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141637918.151, "dur": 38.117, + "args": { + "External id": 500684,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141637960.966, "dur": 5.928, + "args": { + "External id": 500685,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141637963.236, "dur": 2.398, + "args": { + "External id": 500686,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141637972.368, "dur": 1.483, + "args": { + "External id": 500687,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141637987.381, "dur": 7.905, + "args": { + "External id": 500688,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141637989.613, "dur": 5.417, + "args": { + "External id": 500689,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141638121.008, "dur": 193.024, + "args": { + "External id": 500690,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141638123.581, "dur": 3.087, + "args": { + "External id": 500691,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141638128.428, "dur": 185.080, + "args": { + "External id": 500692,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141638130.200, "dur": 0.660, + "args": { + "External id": 500693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141638134.159, "dur": 25.579, + "args": { + "External id": 500694,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141638161.423, "dur": 3.545, + "args": { + "External id": 500695,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141638163.535, "dur": 1.133, + "args": { + "External id": 500696,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141638165.828, "dur": 25.213, + "args": { + "External id": 500697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141638167.051, "dur": 1.257, + "args": { + "External id": 500698,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141638169.338, "dur": 21.446, + "args": { + "External id": 500699,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141638174.355, "dur": 2.321, + "args": { + "External id": 500700,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141638192.289, "dur": 22.820, + "args": { + "External id": 500701,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141638216.683, "dur": 14.418, + "args": { + "External id": 500702,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141638233.992, "dur": 12.898, + "args": { + "External id": 500703,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141638248.537, "dur": 12.119, + "args": { + "External id": 500704,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141638267.032, "dur": 19.486, + "args": { + "External id": 500705,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141638269.105, "dur": 1.601, + "args": { + "External id": 500706,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141638272.668, "dur": 0.692, + "args": { + "External id": 500707,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141638287.583, "dur": 11.773, + "args": { + "External id": 500708,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141638300.596, "dur": 11.950, + "args": { + "External id": 500709,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141638321.900, "dur": 2.194, + "args": { + "External id": 500710,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141638333.615, "dur": 3.622, + "args": { + "External id": 500711,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141638335.982, "dur": 0.450, + "args": { + "External id": 500712,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141638406.895, "dur": 57.088, + "args": { + "External id": 500713,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141638469.245, "dur": 6.933, + "args": { + "External id": 500714,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141638472.023, "dur": 2.954, + "args": { + "External id": 500715,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141638477.727, "dur": 23.250, + "args": { + "External id": 500716,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141638505.744, "dur": 7.573, + "args": { + "External id": 500717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141638507.495, "dur": 5.028, + "args": { + "External id": 500718,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141638511.546, "dur": 0.715, + "args": { + "External id": 500719,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141638515.739, "dur": 41.891, + "args": { + "External id": 500720,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141638517.101, "dur": 39.833, + "args": { + "External id": 500721,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141638561.535, "dur": 14.334, + "args": { + "External id": 500722,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141638581.243, "dur": 20.295, + "args": { + "External id": 500723,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141638598.927, "dur": 0.971, + "args": { + "External id": 500724,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141638607.208, "dur": 51.635, + "args": { + "External id": 500725,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141638610.951, "dur": 3.788, + "args": { + "External id": 500726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141638612.000, "dur": 2.145, + "args": { + "External id": 500727,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141638613.298, "dur": 0.728, + "args": { + "External id": 500728,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141638615.498, "dur": 43.006, + "args": { + "External id": 500729,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141638616.101, "dur": 41.790, + "args": { + "External id": 500730,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141638663.020, "dur": 5.453, + "args": { + "External id": 500731,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141638665.090, "dur": 2.232, + "args": { + "External id": 500732,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141638676.407, "dur": 1.623, + "args": { + "External id": 500733,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141638686.431, "dur": 5.970, + "args": { + "External id": 500734,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141638688.515, "dur": 3.621, + "args": { + "External id": 500735,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141638773.935, "dur": 212.551, + "args": { + "External id": 500736,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141638797.391, "dur": 4.502, + "args": { + "External id": 500737,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141638803.502, "dur": 182.320, + "args": { + "External id": 500738,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141638806.611, "dur": 0.368, + "args": { + "External id": 500739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141638807.961, "dur": 21.607, + "args": { + "External id": 500740,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141638831.235, "dur": 4.856, + "args": { + "External id": 500741,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141638835.321, "dur": 0.571, + "args": { + "External id": 500742,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141638837.011, "dur": 19.736, + "args": { + "External id": 500743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141638837.821, "dur": 1.114, + "args": { + "External id": 500744,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141638840.292, "dur": 16.139, + "args": { + "External id": 500745,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141638842.902, "dur": 2.487, + "args": { + "External id": 500746,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141638858.088, "dur": 24.667, + "args": { + "External id": 500747,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141638884.415, "dur": 13.458, + "args": { + "External id": 500748,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141638902.649, "dur": 14.470, + "args": { + "External id": 500749,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141638918.584, "dur": 13.983, + "args": { + "External id": 500750,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141638934.040, "dur": 21.865, + "args": { + "External id": 500751,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141638937.304, "dur": 1.237, + "args": { + "External id": 500752,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141638940.321, "dur": 0.534, + "args": { + "External id": 500753,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141638957.087, "dur": 12.660, + "args": { + "External id": 500754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141638973.117, "dur": 11.781, + "args": { + "External id": 500755,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141638993.509, "dur": 1.772, + "args": { + "External id": 500756,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141639003.804, "dur": 3.038, + "args": { + "External id": 500757,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141639005.737, "dur": 0.310, + "args": { + "External id": 500758,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141639119.257, "dur": 60.714, + "args": { + "External id": 500759,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141639186.282, "dur": 5.646, + "args": { + "External id": 500760,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141639189.756, "dur": 0.764, + "args": { + "External id": 500761,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141639193.538, "dur": 25.086, + "args": { + "External id": 500762,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141639226.063, "dur": 5.275, + "args": { + "External id": 500763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141639227.494, "dur": 3.110, + "args": { + "External id": 500764,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141639229.189, "dur": 1.235, + "args": { + "External id": 500765,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141639233.973, "dur": 42.042, + "args": { + "External id": 500766,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141639235.011, "dur": 40.256, + "args": { + "External id": 500767,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141639279.985, "dur": 16.357, + "args": { + "External id": 500768,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141639301.728, "dur": 5.868, + "args": { + "External id": 500769,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141639306.096, "dur": 0.548, + "args": { + "External id": 500770,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141639311.752, "dur": 45.273, + "args": { + "External id": 500771,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141639312.614, "dur": 2.948, + "args": { + "External id": 500772,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141639313.323, "dur": 1.679, + "args": { + "External id": 500773,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141639314.277, "dur": 0.590, + "args": { + "External id": 500774,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141639316.222, "dur": 40.448, + "args": { + "External id": 500775,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141639318.820, "dur": 37.255, + "args": { + "External id": 500776,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141639360.774, "dur": 3.366, + "args": { + "External id": 500777,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141639362.556, "dur": 0.484, + "args": { + "External id": 500778,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141639370.230, "dur": 1.522, + "args": { + "External id": 500779,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141639379.745, "dur": 7.730, + "args": { + "External id": 500780,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141639381.651, "dur": 5.549, + "args": { + "External id": 500781,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141639473.882, "dur": 206.926, + "args": { + "External id": 500782,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141639478.063, "dur": 1.904, + "args": { + "External id": 500783,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141639481.760, "dur": 198.526, + "args": { + "External id": 500784,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141639482.973, "dur": 0.543, + "args": { + "External id": 500785,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141639484.573, "dur": 20.811, + "args": { + "External id": 500786,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141639506.928, "dur": 5.128, + "args": { + "External id": 500787,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141639510.747, "dur": 0.947, + "args": { + "External id": 500788,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141639512.803, "dur": 24.025, + "args": { + "External id": 500789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141639513.816, "dur": 1.224, + "args": { + "External id": 500790,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141639518.505, "dur": 18.008, + "args": { + "External id": 500791,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141639520.850, "dur": 2.230, + "args": { + "External id": 500792,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141639538.046, "dur": 19.498, + "args": { + "External id": 500793,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141639558.816, "dur": 18.198, + "args": { + "External id": 500794,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141639579.265, "dur": 30.273, + "args": { + "External id": 500795,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141639611.921, "dur": 14.685, + "args": { + "External id": 500796,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141639628.274, "dur": 21.181, + "args": { + "External id": 500797,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141639630.307, "dur": 1.645, + "args": { + "External id": 500798,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141639633.840, "dur": 0.761, + "args": { + "External id": 500799,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141639653.341, "dur": 13.331, + "args": { + "External id": 500800,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141639667.783, "dur": 11.416, + "args": { + "External id": 500801,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141639688.520, "dur": 2.055, + "args": { + "External id": 500802,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141639699.628, "dur": 3.321, + "args": { + "External id": 500803,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141639701.807, "dur": 0.342, + "args": { + "External id": 500804,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141639767.429, "dur": 56.683, + "args": { + "External id": 500805,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141639828.991, "dur": 4.558, + "args": { + "External id": 500806,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141639831.976, "dur": 0.560, + "args": { + "External id": 500807,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141639837.136, "dur": 23.172, + "args": { + "External id": 500808,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141639864.478, "dur": 5.145, + "args": { + "External id": 500809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141639865.919, "dur": 3.061, + "args": { + "External id": 500810,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141639867.708, "dur": 1.148, + "args": { + "External id": 500811,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141639872.052, "dur": 41.434, + "args": { + "External id": 500812,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141639873.288, "dur": 39.560, + "args": { + "External id": 500813,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141639917.303, "dur": 15.714, + "args": { + "External id": 500814,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141639940.428, "dur": 29.655, + "args": { + "External id": 500815,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 14995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141639942.777, "dur": 26.774, + "args": { + "External id": 500816,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141639947.183, "dur": 4.248, + "args": { + "External id": 500817,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 14997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141639975.617, "dur": 28.174, + "args": { + "External id": 500818,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 14998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141639977.337, "dur": 26.200, + "args": { + "External id": 500819,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 14999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141639982.277, "dur": 3.740, + "args": { + "External id": 500820,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141639987.219, "dur": 15.746, + "args": { + "External id": 500821,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2338711, + "ts": 6274141640018.262, "dur": 7.795, + "args": { + "External id": 500822,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2338711, + "ts": 6274141640022.529, "dur": 3.221, + "args": { + "External id": 500823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2338711, + "ts": 6274141640027.252, "dur": 1.079, + "args": { + "External id": 500824,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2338711, + "ts": 6274141640027.538, "dur": 0.720, + "args": { + "External id": 500825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141640106.258, "dur": 44.581, + "args": { + "External id": 500826,"Sequence number": 5516211, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15006 + } + }, + { + "ph": "s", "id": 23, "pid": 2338711, "tid": 2338711, "ts": 6274141640106.258, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141640157.843, "dur": 7.293, + "args": { + "External id": 500827,"Sequence number": 5516212, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 15007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141640162.032, "dur": 1.327, + "args": { + "External id": 500828,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2338711, + "ts": 6274141640167.595, "dur": 7.859, + "args": { + "External id": 500829,"Sequence number": 5516212, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "1"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 15009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141640173.735, "dur": 0.454, + "args": { + "External id": 500830,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141640176.712, "dur": 2.333, + "args": { + "External id": 500831,"Sequence number": 5516212, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 15011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141640178.045, "dur": 0.407, + "args": { + "External id": 500832,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 15012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141640183.456, "dur": 6.109, + "args": { + "External id": 500833,"Sequence number": 5516212, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15013 + } + }, + { + "ph": "s", "id": 22, "pid": 2338711, "tid": 2338711, "ts": 6274141640183.456, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141640186.872, "dur": 1.009, + "args": { + "External id": 500834,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141640192.859, "dur": 3.913, + "args": { + "External id": 500835,"Sequence number": 5516213, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15015 + } + }, + { + "ph": "s", "id": 21, "pid": 2338711, "tid": 2338711, "ts": 6274141640192.859, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141640195.603, "dur": 0.457, + "args": { + "External id": 500836,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2338711, + "ts": 6274141640197.626, "dur": 4.917, + "args": { + "External id": 500837,"Sequence number": 5516214, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 15017 + } + }, + { + "ph": "s", "id": 20, "pid": 2338711, "tid": 2338711, "ts": 6274141640197.626, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141640201.012, "dur": 0.705, + "args": { + "External id": 500838,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141640203.632, "dur": 6.384, + "args": { + "External id": 500839,"Sequence number": 5516215, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 15019 + } + }, + { + "ph": "s", "id": 19, "pid": 2338711, "tid": 2338711, "ts": 6274141640203.632, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141640206.287, "dur": 3.031, + "args": { + "External id": 500840,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 15020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2338711, + "ts": 6274141640214.271, "dur": 37.541, + "args": { + "External id": 500841,"Sequence number": 5516216, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2338711, + "ts": 6274141640219.074, "dur": 32.509, + "args": { + "External id": 500842,"Sequence number": 5516216, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141640221.694, "dur": 9.563, + "args": { + "External id": 500843,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 15023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141640226.574, "dur": 4.138, + "args": { + "External id": 500844,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141640232.390, "dur": 18.796, + "args": { + "External id": 500845,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 15025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141640278.770, "dur": 3.532, + "args": { + "External id": 500846,"Sequence number": 5516216, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 15026 + } + }, + { + "ph": "s", "id": 18, "pid": 2338711, "tid": 2338711, "ts": 6274141640278.770, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141640284.462, "dur": 1.101, + "args": { + "External id": 500847,"Sequence number": 5516217, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141640319.592, "dur": 49710.530, + "args": { + "External id": 500848,"Sequence number": 5516217, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 15028 + } + }, + { + "ph": "s", "id": 17, "pid": 2338711, "tid": 2338711, "ts": 6274141640319.592, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2338711, + "ts": 6274141640334.187, "dur": 28.025, + "args": { + "External id": 500849,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2338711, + "ts": 6274141640335.184, "dur": 26.844, + "args": { + "External id": 500850,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141640336.933, "dur": 5.267, + "args": { + "External id": 500851,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141640338.433, "dur": 3.466, + "args": { + "External id": 500852,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141640342.957, "dur": 18.670, + "args": { + "External id": 500853,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 15033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141640378.917, "dur": 25.297, + "args": { + "External id": 500854,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141640380.162, "dur": 5.367, + "args": { + "External id": 500855,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141640382.173, "dur": 3.053, + "args": { + "External id": 500856,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274141640387.013, "dur": 16.989, + "args": { + "External id": 500857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141640388.948, "dur": 14.540, + "args": { + "External id": 500858,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141640408.039, "dur": 20.800, + "args": { + "External id": 500859,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141640408.974, "dur": 7.584, + "args": { + "External id": 500860,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141640413.048, "dur": 3.293, + "args": { + "External id": 500861,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274141640417.053, "dur": 11.589, + "args": { + "External id": 500862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141640417.561, "dur": 10.755, + "args": { + "External id": 500863,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 15043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2338711, + "ts": 6274141640435.490, "dur": 15.357, + "args": { + "External id": 500864,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 15044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141640437.374, "dur": 2.260, + "args": { + "External id": 500865,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274141640440.314, "dur": 10.278, + "args": { + "External id": 500866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 15046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141640441.088, "dur": 9.206, + "args": { + "External id": 500867,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338711, "tid": 2338711, + "ts": 6274141640457.426, "dur": 23.984, + "args": { + "External id": 500868,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141640484.108, "dur": 47.179, + "args": { + "External id": 500869,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141640486.280, "dur": 44.655, + "args": { + "External id": 500870,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141640490.241, "dur": 2.190, + "args": { + "External id": 500871,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141640493.946, "dur": 21.100, + "args": { + "External id": 500872,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141640495.323, "dur": 19.520, + "args": { + "External id": 500873,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 15053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141640497.987, "dur": 3.075, + "args": { + "External id": 500874,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141640502.010, "dur": 12.398, + "args": { + "External id": 500875,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 15055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338711, "tid": 2338711, + "ts": 6274141640536.059, "dur": 43679.055, + "args": { + "External id": 500876,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338711, "tid": 2338711, + "ts": 6274141640537.317, "dur": 43676.135, + "args": { + "External id": 500877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141684228.232, "dur": 10.508, + "args": { + "External id": 500878,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141684235.632, "dur": 1.120, + "args": { + "External id": 500879,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141684244.735, "dur": 110.038, + "args": { + "External id": 500880,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141684246.311, "dur": 5.904, + "args": { + "External id": 500881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141684248.723, "dur": 2.684, + "args": { + "External id": 500882,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141684250.483, "dur": 0.629, + "args": { + "External id": 500883,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141684253.645, "dur": 100.218, + "args": { + "External id": 500884,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141684258.039, "dur": 95.001, + "args": { + "External id": 500885,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141684358.419, "dur": 4.554, + "args": { + "External id": 500886,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141684360.892, "dur": 0.687, + "args": { + "External id": 500887,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141684371.300, "dur": 2.196, + "args": { + "External id": 500888,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141684383.803, "dur": 6.426, + "args": { + "External id": 500889,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141684385.959, "dur": 3.989, + "args": { + "External id": 500890,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141684529.061, "dur": 216.415, + "args": { + "External id": 500891,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141684533.844, "dur": 4.243, + "args": { + "External id": 500892,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141684539.661, "dur": 205.182, + "args": { + "External id": 500893,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141684541.486, "dur": 0.493, + "args": { + "External id": 500894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141684543.263, "dur": 24.642, + "args": { + "External id": 500895,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141684569.716, "dur": 4.416, + "args": { + "External id": 500896,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141684573.219, "dur": 0.584, + "args": { + "External id": 500897,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141684575.348, "dur": 40.495, + "args": { + "External id": 500898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141684576.810, "dur": 1.053, + "args": { + "External id": 500899,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141684581.438, "dur": 34.109, + "args": { + "External id": 500900,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141684598.168, "dur": 3.541, + "args": { + "External id": 500901,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141684617.579, "dur": 23.433, + "args": { + "External id": 500902,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141684643.673, "dur": 16.610, + "args": { + "External id": 500903,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141684663.562, "dur": 15.677, + "args": { + "External id": 500904,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141684680.813, "dur": 12.570, + "args": { + "External id": 500905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141684695.099, "dur": 23.008, + "args": { + "External id": 500906,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141684697.603, "dur": 1.520, + "args": { + "External id": 500907,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141684703.187, "dur": 0.670, + "args": { + "External id": 500908,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141684719.655, "dur": 11.465, + "args": { + "External id": 500909,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141684732.439, "dur": 11.319, + "args": { + "External id": 500910,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141684754.596, "dur": 2.204, + "args": { + "External id": 500911,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141684764.049, "dur": 3.937, + "args": { + "External id": 500912,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141684766.263, "dur": 0.622, + "args": { + "External id": 500913,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141684845.261, "dur": 71.591, + "args": { + "External id": 500914,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141684922.529, "dur": 9.368, + "args": { + "External id": 500915,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141684927.841, "dur": 1.136, + "args": { + "External id": 500916,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141684933.601, "dur": 26.354, + "args": { + "External id": 500917,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141684965.731, "dur": 9.590, + "args": { + "External id": 500918,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141684968.053, "dur": 6.471, + "args": { + "External id": 500919,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141684970.158, "dur": 4.126, + "args": { + "External id": 500920,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141684978.565, "dur": 47.869, + "args": { + "External id": 500921,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141684982.011, "dur": 43.763, + "args": { + "External id": 500922,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141685033.685, "dur": 14.676, + "args": { + "External id": 500923,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141685090.183, "dur": 5.870, + "args": { + "External id": 500924,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141685093.719, "dur": 0.840, + "args": { + "External id": 500925,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141685102.002, "dur": 55.713, + "args": { + "External id": 500926,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141685103.099, "dur": 6.662, + "args": { + "External id": 500927,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141685106.979, "dur": 2.245, + "args": { + "External id": 500928,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141685108.454, "dur": 0.618, + "args": { + "External id": 500929,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141685110.517, "dur": 46.633, + "args": { + "External id": 500930,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141685111.200, "dur": 45.279, + "args": { + "External id": 500931,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141685162.628, "dur": 4.329, + "args": { + "External id": 500932,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141685164.993, "dur": 0.712, + "args": { + "External id": 500933,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141685173.944, "dur": 1.734, + "args": { + "External id": 500934,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141685187.308, "dur": 8.976, + "args": { + "External id": 500935,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141685189.303, "dur": 6.690, + "args": { + "External id": 500936,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141685299.664, "dur": 189.587, + "args": { + "External id": 500937,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141685301.844, "dur": 1.750, + "args": { + "External id": 500938,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141685305.413, "dur": 183.271, + "args": { + "External id": 500939,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141685306.940, "dur": 0.395, + "args": { + "External id": 500940,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141685311.034, "dur": 24.001, + "args": { + "External id": 500941,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141685336.950, "dur": 3.089, + "args": { + "External id": 500942,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141685339.202, "dur": 0.596, + "args": { + "External id": 500943,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141685341.112, "dur": 24.746, + "args": { + "External id": 500944,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141685342.278, "dur": 3.650, + "args": { + "External id": 500945,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141685347.087, "dur": 18.501, + "args": { + "External id": 500946,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141685351.853, "dur": 2.508, + "args": { + "External id": 500947,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141685367.552, "dur": 20.450, + "args": { + "External id": 500948,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141685390.066, "dur": 13.590, + "args": { + "External id": 500949,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141685406.223, "dur": 12.537, + "args": { + "External id": 500950,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141685420.331, "dur": 13.904, + "args": { + "External id": 500951,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141685438.255, "dur": 21.909, + "args": { + "External id": 500952,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141685440.559, "dur": 1.956, + "args": { + "External id": 500953,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141685444.562, "dur": 0.668, + "args": { + "External id": 500954,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141685461.446, "dur": 13.475, + "args": { + "External id": 500955,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141685476.050, "dur": 11.534, + "args": { + "External id": 500956,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141685496.455, "dur": 2.007, + "args": { + "External id": 500957,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141685507.610, "dur": 3.713, + "args": { + "External id": 500958,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141685510.066, "dur": 0.373, + "args": { + "External id": 500959,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141685600.168, "dur": 61.091, + "args": { + "External id": 500960,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141685668.360, "dur": 8.011, + "args": { + "External id": 500961,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141685671.722, "dur": 2.949, + "args": { + "External id": 500962,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141685677.800, "dur": 27.444, + "args": { + "External id": 500963,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141685710.322, "dur": 7.602, + "args": { + "External id": 500964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141685712.106, "dur": 4.994, + "args": { + "External id": 500965,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141685716.035, "dur": 0.887, + "args": { + "External id": 500966,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141685720.992, "dur": 43.552, + "args": { + "External id": 500967,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141685722.183, "dur": 41.587, + "args": { + "External id": 500968,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141685768.839, "dur": 16.799, + "args": { + "External id": 500969,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141685791.691, "dur": 4.108, + "args": { + "External id": 500970,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141685794.345, "dur": 0.541, + "args": { + "External id": 500971,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141685800.157, "dur": 46.220, + "args": { + "External id": 500972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141685803.253, "dur": 3.591, + "args": { + "External id": 500973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141685803.878, "dur": 2.342, + "args": { + "External id": 500974,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141685805.495, "dur": 0.618, + "args": { + "External id": 500975,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141685807.679, "dur": 38.340, + "args": { + "External id": 500976,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141685808.163, "dur": 37.327, + "args": { + "External id": 500977,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141685858.132, "dur": 9.731, + "args": { + "External id": 500978,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141685863.844, "dur": 2.877, + "args": { + "External id": 500979,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141685876.792, "dur": 1.500, + "args": { + "External id": 500980,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141685886.893, "dur": 7.559, + "args": { + "External id": 500981,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141685888.965, "dur": 5.213, + "args": { + "External id": 500982,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141685987.875, "dur": 228.450, + "args": { + "External id": 500983,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141685989.670, "dur": 1.721, + "args": { + "External id": 500984,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141685992.820, "dur": 222.967, + "args": { + "External id": 500985,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141685996.274, "dur": 0.322, + "args": { + "External id": 500986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141685997.562, "dur": 24.683, + "args": { + "External id": 500987,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141686023.652, "dur": 3.082, + "args": { + "External id": 500988,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141686025.612, "dur": 0.909, + "args": { + "External id": 500989,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141686027.753, "dur": 57.240, + "args": { + "External id": 500990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141686029.471, "dur": 1.090, + "args": { + "External id": 500991,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141686031.856, "dur": 20.537, + "args": { + "External id": 500992,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141686036.411, "dur": 2.932, + "args": { + "External id": 500993,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141686087.360, "dur": 28.567, + "args": { + "External id": 500994,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141686117.607, "dur": 13.893, + "args": { + "External id": 500995,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141686137.206, "dur": 13.291, + "args": { + "External id": 500996,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141686151.853, "dur": 12.233, + "args": { + "External id": 500997,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141686165.945, "dur": 20.582, + "args": { + "External id": 500998,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141686168.465, "dur": 1.549, + "args": { + "External id": 500999,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141686172.336, "dur": 0.811, + "args": { + "External id": 501000,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141686187.756, "dur": 10.952, + "args": { + "External id": 501001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141686201.661, "dur": 13.182, + "args": { + "External id": 501002,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141686225.076, "dur": 2.418, + "args": { + "External id": 501003,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141686237.488, "dur": 3.657, + "args": { + "External id": 501004,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141686239.737, "dur": 0.547, + "args": { + "External id": 501005,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141686313.873, "dur": 58.603, + "args": { + "External id": 501006,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141686377.576, "dur": 5.542, + "args": { + "External id": 501007,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141686380.926, "dur": 1.196, + "args": { + "External id": 501008,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141686384.852, "dur": 27.164, + "args": { + "External id": 501009,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141686419.133, "dur": 5.159, + "args": { + "External id": 501010,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141686420.703, "dur": 2.920, + "args": { + "External id": 501011,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141686422.464, "dur": 0.940, + "args": { + "External id": 501012,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141686427.010, "dur": 64.381, + "args": { + "External id": 501013,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141686428.436, "dur": 62.318, + "args": { + "External id": 501014,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141686495.690, "dur": 37.791, + "args": { + "External id": 501015,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141686539.049, "dur": 5.894, + "args": { + "External id": 501016,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141686543.559, "dur": 0.508, + "args": { + "External id": 501017,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141686549.138, "dur": 72.157, + "args": { + "External id": 501018,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141686550.069, "dur": 5.660, + "args": { + "External id": 501019,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141686551.078, "dur": 4.019, + "args": { + "External id": 501020,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141686552.348, "dur": 2.519, + "args": { + "External id": 501021,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141686556.362, "dur": 64.408, + "args": { + "External id": 501022,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141686559.225, "dur": 60.469, + "args": { + "External id": 501023,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141686627.498, "dur": 4.866, + "args": { + "External id": 501024,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141686630.301, "dur": 0.590, + "args": { + "External id": 501025,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141686638.304, "dur": 1.496, + "args": { + "External id": 501026,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141686648.094, "dur": 5.956, + "args": { + "External id": 501027,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141686650.167, "dur": 3.582, + "args": { + "External id": 501028,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141686745.131, "dur": 178.068, + "args": { + "External id": 501029,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141686747.841, "dur": 4.152, + "args": { + "External id": 501030,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141686753.664, "dur": 169.122, + "args": { + "External id": 501031,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141686754.988, "dur": 0.373, + "args": { + "External id": 501032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141686758.924, "dur": 21.119, + "args": { + "External id": 501033,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141686781.900, "dur": 3.970, + "args": { + "External id": 501034,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141686784.327, "dur": 1.145, + "args": { + "External id": 501035,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141686786.786, "dur": 23.957, + "args": { + "External id": 501036,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141686787.720, "dur": 2.599, + "args": { + "External id": 501037,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141686794.167, "dur": 16.006, + "args": { + "External id": 501038,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141686796.959, "dur": 2.101, + "args": { + "External id": 501039,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141686812.132, "dur": 19.475, + "args": { + "External id": 501040,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141686833.487, "dur": 13.365, + "args": { + "External id": 501041,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141686849.486, "dur": 12.266, + "args": { + "External id": 501042,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141686863.349, "dur": 11.721, + "args": { + "External id": 501043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141686876.710, "dur": 18.329, + "args": { + "External id": 501044,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141686879.034, "dur": 1.498, + "args": { + "External id": 501045,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141686882.478, "dur": 0.528, + "args": { + "External id": 501046,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141686898.580, "dur": 10.585, + "args": { + "External id": 501047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141686910.203, "dur": 11.439, + "args": { + "External id": 501048,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141686929.226, "dur": 1.763, + "args": { + "External id": 501049,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141686939.639, "dur": 3.698, + "args": { + "External id": 501050,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141686941.683, "dur": 0.781, + "args": { + "External id": 501051,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141687006.579, "dur": 85.085, + "args": { + "External id": 501052,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141687099.783, "dur": 6.392, + "args": { + "External id": 501053,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141687103.466, "dur": 1.078, + "args": { + "External id": 501054,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141687109.984, "dur": 25.618, + "args": { + "External id": 501055,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141687141.208, "dur": 7.687, + "args": { + "External id": 501056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141687143.168, "dur": 4.964, + "args": { + "External id": 501057,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141687145.377, "dur": 2.538, + "args": { + "External id": 501058,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141687152.165, "dur": 48.047, + "args": { + "External id": 501059,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141687153.798, "dur": 45.785, + "args": { + "External id": 501060,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141687204.219, "dur": 15.643, + "args": { + "External id": 501061,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141687227.768, "dur": 3.847, + "args": { + "External id": 501062,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141687230.199, "dur": 0.629, + "args": { + "External id": 501063,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141687235.643, "dur": 47.149, + "args": { + "External id": 501064,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141687236.815, "dur": 3.475, + "args": { + "External id": 501065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141687237.760, "dur": 1.937, + "args": { + "External id": 501066,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141687239.003, "dur": 0.563, + "args": { + "External id": 501067,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141687243.818, "dur": 38.647, + "args": { + "External id": 501068,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141687244.396, "dur": 37.500, + "args": { + "External id": 501069,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141687286.982, "dur": 3.770, + "args": { + "External id": 501070,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141687289.024, "dur": 0.529, + "args": { + "External id": 501071,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141687297.057, "dur": 1.607, + "args": { + "External id": 501072,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141687306.848, "dur": 10.855, + "args": { + "External id": 501073,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141687311.428, "dur": 5.971, + "args": { + "External id": 501074,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141687409.940, "dur": 193.295, + "args": { + "External id": 501075,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141687412.220, "dur": 2.008, + "args": { + "External id": 501076,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141687417.881, "dur": 184.789, + "args": { + "External id": 501077,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141687419.290, "dur": 0.466, + "args": { + "External id": 501078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141687420.791, "dur": 20.761, + "args": { + "External id": 501079,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141687443.186, "dur": 4.552, + "args": { + "External id": 501080,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141687445.583, "dur": 1.886, + "args": { + "External id": 501081,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141687448.693, "dur": 23.531, + "args": { + "External id": 501082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141687452.478, "dur": 1.180, + "args": { + "External id": 501083,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141687454.915, "dur": 16.973, + "args": { + "External id": 501084,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141687458.046, "dur": 2.719, + "args": { + "External id": 501085,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141687473.610, "dur": 18.549, + "args": { + "External id": 501086,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141687494.027, "dur": 13.088, + "args": { + "External id": 501087,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141687510.141, "dur": 12.619, + "args": { + "External id": 501088,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141687524.286, "dur": 11.814, + "args": { + "External id": 501089,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141687537.891, "dur": 22.537, + "args": { + "External id": 501090,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141687540.143, "dur": 1.459, + "args": { + "External id": 501091,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141687545.586, "dur": 2.419, + "args": { + "External id": 501092,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141687561.592, "dur": 11.367, + "args": { + "External id": 501093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141687574.216, "dur": 26.451, + "args": { + "External id": 501094,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141687612.701, "dur": 2.283, + "args": { + "External id": 501095,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141687625.510, "dur": 3.856, + "args": { + "External id": 501096,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141687627.860, "dur": 0.520, + "args": { + "External id": 501097,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141687701.343, "dur": 54.901, + "args": { + "External id": 501098,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141687763.354, "dur": 4.997, + "args": { + "External id": 501099,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141687766.260, "dur": 0.784, + "args": { + "External id": 501100,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141687769.714, "dur": 25.235, + "args": { + "External id": 501101,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141687799.547, "dur": 5.399, + "args": { + "External id": 501102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141687801.217, "dur": 3.063, + "args": { + "External id": 501103,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141687802.991, "dur": 1.127, + "args": { + "External id": 501104,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141687809.860, "dur": 40.220, + "args": { + "External id": 501105,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141687810.892, "dur": 38.691, + "args": { + "External id": 501106,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141687858.196, "dur": 14.828, + "args": { + "External id": 501107,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141687878.665, "dur": 3.674, + "args": { + "External id": 501108,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141687880.724, "dur": 0.741, + "args": { + "External id": 501109,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141687886.199, "dur": 47.729, + "args": { + "External id": 501110,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141687887.127, "dur": 7.382, + "args": { + "External id": 501111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141687890.177, "dur": 3.738, + "args": { + "External id": 501112,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141687891.435, "dur": 2.280, + "args": { + "External id": 501113,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141687895.266, "dur": 38.238, + "args": { + "External id": 501114,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141687895.906, "dur": 36.950, + "args": { + "External id": 501115,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141687937.767, "dur": 4.063, + "args": { + "External id": 501116,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141687940.348, "dur": 0.301, + "args": { + "External id": 501117,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141687947.172, "dur": 1.340, + "args": { + "External id": 501118,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141687957.642, "dur": 7.994, + "args": { + "External id": 501119,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141687959.403, "dur": 5.908, + "args": { + "External id": 501120,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141688047.872, "dur": 241.315, + "args": { + "External id": 501121,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141688049.838, "dur": 1.904, + "args": { + "External id": 501122,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141688087.769, "dur": 200.848, + "args": { + "External id": 501123,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141688089.371, "dur": 0.425, + "args": { + "External id": 501124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141688093.502, "dur": 28.414, + "args": { + "External id": 501125,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141688123.689, "dur": 3.145, + "args": { + "External id": 501126,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141688125.725, "dur": 0.805, + "args": { + "External id": 501127,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141688127.630, "dur": 25.241, + "args": { + "External id": 501128,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141688128.897, "dur": 1.761, + "args": { + "External id": 501129,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141688131.711, "dur": 20.867, + "args": { + "External id": 501130,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141688136.815, "dur": 2.356, + "args": { + "External id": 501131,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141688154.306, "dur": 24.183, + "args": { + "External id": 501132,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141688180.360, "dur": 15.293, + "args": { + "External id": 501133,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141688198.701, "dur": 18.049, + "args": { + "External id": 501134,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141688218.222, "dur": 14.109, + "args": { + "External id": 501135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141688236.280, "dur": 23.349, + "args": { + "External id": 501136,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141688238.278, "dur": 1.781, + "args": { + "External id": 501137,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141688242.124, "dur": 0.829, + "args": { + "External id": 501138,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141688260.985, "dur": 13.704, + "args": { + "External id": 501139,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141688276.060, "dur": 11.456, + "args": { + "External id": 501140,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141688298.424, "dur": 2.521, + "args": { + "External id": 501141,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141688310.435, "dur": 3.948, + "args": { + "External id": 501142,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141688312.938, "dur": 0.473, + "args": { + "External id": 501143,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141688386.733, "dur": 56.009, + "args": { + "External id": 501144,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141688447.679, "dur": 5.221, + "args": { + "External id": 501145,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141688450.583, "dur": 1.086, + "args": { + "External id": 501146,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141688454.356, "dur": 23.955, + "args": { + "External id": 501147,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141688482.744, "dur": 7.672, + "args": { + "External id": 501148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141688484.440, "dur": 5.366, + "args": { + "External id": 501149,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141688488.425, "dur": 1.192, + "args": { + "External id": 501150,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141688492.793, "dur": 43.032, + "args": { + "External id": 501151,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141688494.269, "dur": 40.911, + "args": { + "External id": 501152,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141688539.393, "dur": 15.986, + "args": { + "External id": 501153,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141688561.319, "dur": 3.813, + "args": { + "External id": 501154,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141688563.599, "dur": 0.673, + "args": { + "External id": 501155,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141688569.117, "dur": 67.519, + "args": { + "External id": 501156,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141688572.683, "dur": 5.230, + "args": { + "External id": 501157,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141688573.452, "dur": 3.848, + "args": { + "External id": 501158,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141688574.799, "dur": 2.361, + "args": { + "External id": 501159,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141688578.927, "dur": 57.168, + "args": { + "External id": 501160,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141688579.564, "dur": 55.378, + "args": { + "External id": 501161,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141688642.586, "dur": 6.580, + "args": { + "External id": 501162,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141688645.309, "dur": 2.611, + "args": { + "External id": 501163,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141688657.787, "dur": 1.725, + "args": { + "External id": 501164,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141688668.139, "dur": 9.544, + "args": { + "External id": 501165,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141688673.877, "dur": 3.523, + "args": { + "External id": 501166,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141688762.660, "dur": 195.048, + "args": { + "External id": 501167,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141688766.737, "dur": 2.031, + "args": { + "External id": 501168,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141688770.268, "dur": 186.968, + "args": { + "External id": 501169,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141688773.564, "dur": 0.530, + "args": { + "External id": 501170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141688775.041, "dur": 23.186, + "args": { + "External id": 501171,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141688799.743, "dur": 4.764, + "args": { + "External id": 501172,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141688803.501, "dur": 0.808, + "args": { + "External id": 501173,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141688805.316, "dur": 21.346, + "args": { + "External id": 501174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141688806.466, "dur": 0.998, + "args": { + "External id": 501175,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141688808.665, "dur": 17.729, + "args": { + "External id": 501176,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141688811.285, "dur": 2.491, + "args": { + "External id": 501177,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141688827.926, "dur": 24.158, + "args": { + "External id": 501178,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141688853.547, "dur": 15.129, + "args": { + "External id": 501179,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141688873.985, "dur": 15.609, + "args": { + "External id": 501180,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141688890.930, "dur": 13.562, + "args": { + "External id": 501181,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141688905.976, "dur": 20.607, + "args": { + "External id": 501182,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141688907.818, "dur": 1.591, + "args": { + "External id": 501183,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141688911.396, "dur": 0.624, + "args": { + "External id": 501184,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141688927.769, "dur": 13.054, + "args": { + "External id": 501185,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141688944.295, "dur": 11.741, + "args": { + "External id": 501186,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141688964.148, "dur": 1.471, + "args": { + "External id": 501187,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141688974.511, "dur": 3.424, + "args": { + "External id": 501188,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141688976.513, "dur": 0.574, + "args": { + "External id": 501189,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141689037.045, "dur": 105.024, + "args": { + "External id": 501190,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141689149.501, "dur": 6.513, + "args": { + "External id": 501191,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141689153.527, "dur": 0.838, + "args": { + "External id": 501192,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141689157.432, "dur": 27.319, + "args": { + "External id": 501193,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141689192.505, "dur": 5.285, + "args": { + "External id": 501194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141689193.917, "dur": 3.235, + "args": { + "External id": 501195,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141689195.868, "dur": 1.050, + "args": { + "External id": 501196,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141689200.992, "dur": 43.096, + "args": { + "External id": 501197,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141689201.904, "dur": 41.633, + "args": { + "External id": 501198,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141689248.024, "dur": 16.010, + "args": { + "External id": 501199,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141689270.019, "dur": 5.794, + "args": { + "External id": 501200,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141689274.579, "dur": 0.468, + "args": { + "External id": 501201,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141689280.298, "dur": 49.282, + "args": { + "External id": 501202,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141689284.463, "dur": 3.600, + "args": { + "External id": 501203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141689285.325, "dur": 2.146, + "args": { + "External id": 501204,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141689286.498, "dur": 0.867, + "args": { + "External id": 501205,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141689288.729, "dur": 40.470, + "args": { + "External id": 501206,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141689291.668, "dur": 36.885, + "args": { + "External id": 501207,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141689333.761, "dur": 3.933, + "args": { + "External id": 501208,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141689335.992, "dur": 0.646, + "args": { + "External id": 501209,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141689343.577, "dur": 1.649, + "args": { + "External id": 501210,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141689352.886, "dur": 7.939, + "args": { + "External id": 501211,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141689355.214, "dur": 5.277, + "args": { + "External id": 501212,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141689449.558, "dur": 196.839, + "args": { + "External id": 501213,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141689451.712, "dur": 4.127, + "args": { + "External id": 501214,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141689457.471, "dur": 188.382, + "args": { + "External id": 501215,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141689458.988, "dur": 0.511, + "args": { + "External id": 501216,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141689460.608, "dur": 22.949, + "args": { + "External id": 501217,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141689485.404, "dur": 2.693, + "args": { + "External id": 501218,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141689487.346, "dur": 0.544, + "args": { + "External id": 501219,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141689489.147, "dur": 25.022, + "args": { + "External id": 501220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141689490.394, "dur": 1.654, + "args": { + "External id": 501221,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141689495.285, "dur": 18.580, + "args": { + "External id": 501222,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141689499.874, "dur": 2.221, + "args": { + "External id": 501223,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141689515.701, "dur": 18.768, + "args": { + "External id": 501224,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141689536.065, "dur": 13.577, + "args": { + "External id": 501225,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141689552.013, "dur": 12.037, + "args": { + "External id": 501226,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141689565.315, "dur": 11.915, + "args": { + "External id": 501227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141689578.806, "dur": 38.116, + "args": { + "External id": 501228,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141689581.035, "dur": 1.828, + "args": { + "External id": 501229,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141689600.095, "dur": 1.111, + "args": { + "External id": 501230,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141689620.555, "dur": 11.763, + "args": { + "External id": 501231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141689633.302, "dur": 11.362, + "args": { + "External id": 501232,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141689654.670, "dur": 2.204, + "args": { + "External id": 501233,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141689666.503, "dur": 3.453, + "args": { + "External id": 501234,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141689668.781, "dur": 0.372, + "args": { + "External id": 501235,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141689733.403, "dur": 54.052, + "args": { + "External id": 501236,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141689792.270, "dur": 4.913, + "args": { + "External id": 501237,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141689795.512, "dur": 0.757, + "args": { + "External id": 501238,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141689800.916, "dur": 23.608, + "args": { + "External id": 501239,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141689828.646, "dur": 5.015, + "args": { + "External id": 501240,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141689830.090, "dur": 2.916, + "args": { + "External id": 501241,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141689832.125, "dur": 0.745, + "args": { + "External id": 501242,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141689835.970, "dur": 41.048, + "args": { + "External id": 501243,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141689837.608, "dur": 38.917, + "args": { + "External id": 501244,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141689880.604, "dur": 13.171, + "args": { + "External id": 501245,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141689900.899, "dur": 24.632, + "args": { + "External id": 501246,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141689903.605, "dur": 21.532, + "args": { + "External id": 501247,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141689910.073, "dur": 0.604, + "args": { + "External id": 501248,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141689954.960, "dur": 28.184, + "args": { + "External id": 501249,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141689956.931, "dur": 26.022, + "args": { + "External id": 501250,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 15430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141689961.778, "dur": 3.683, + "args": { + "External id": 501251,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141689966.438, "dur": 16.052, + "args": { + "External id": 501252,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2338711, + "ts": 6274141689996.245, "dur": 7.560, + "args": { + "External id": 501253,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2338711, + "ts": 6274141690000.762, "dur": 2.736, + "args": { + "External id": 501254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2338711, + "ts": 6274141690004.841, "dur": 1.101, + "args": { + "External id": 501255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2338711, + "ts": 6274141690005.375, "dur": 0.481, + "args": { + "External id": 501256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141690046.689, "dur": 60.956, + "args": { + "External id": 501257,"Sequence number": 5516218, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141690110.831, "dur": 16.908, + "args": { + "External id": 501258,"Sequence number": 5516219, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15438 + } + }, + { + "ph": "s", "id": 16, "pid": 2338711, "tid": 2338711, "ts": 6274141690110.831, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141690134.833, "dur": 7.536, + "args": { + "External id": 501259,"Sequence number": 5516220, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 15439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141690139.221, "dur": 1.360, + "args": { + "External id": 501260,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2338711, + "ts": 6274141690144.986, "dur": 8.133, + "args": { + "External id": 501261,"Sequence number": 5516220, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 15441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141690151.187, "dur": 0.622, + "args": { + "External id": 501262,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141690154.616, "dur": 2.417, + "args": { + "External id": 501263,"Sequence number": 5516220, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 15443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141690156.107, "dur": 0.265, + "args": { + "External id": 501264,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 15444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141690161.193, "dur": 5.930, + "args": { + "External id": 501265,"Sequence number": 5516220, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15445 + } + }, + { + "ph": "s", "id": 15, "pid": 2338711, "tid": 2338711, "ts": 6274141690161.193, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141690165.093, "dur": 0.697, + "args": { + "External id": 501266,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141690170.533, "dur": 6.566, + "args": { + "External id": 501267,"Sequence number": 5516221, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15447 + } + }, + { + "ph": "s", "id": 14, "pid": 2338711, "tid": 2338711, "ts": 6274141690170.533, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141690175.936, "dur": 0.430, + "args": { + "External id": 501268,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2338711, + "ts": 6274141690178.142, "dur": 5.128, + "args": { + "External id": 501269,"Sequence number": 5516222, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 15449 + } + }, + { + "ph": "s", "id": 13, "pid": 2338711, "tid": 2338711, "ts": 6274141690178.142, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141690181.624, "dur": 0.774, + "args": { + "External id": 501270,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141690184.215, "dur": 6.430, + "args": { + "External id": 501271,"Sequence number": 5516223, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 15451 + } + }, + { + "ph": "s", "id": 12, "pid": 2338711, "tid": 2338711, "ts": 6274141690184.215, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141690186.948, "dur": 2.985, + "args": { + "External id": 501272,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 15452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2338711, + "ts": 6274141690194.136, "dur": 33.465, + "args": { + "External id": 501273,"Sequence number": 5516224, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2338711, + "ts": 6274141690197.931, "dur": 29.469, + "args": { + "External id": 501274,"Sequence number": 5516224, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141690200.695, "dur": 7.104, + "args": { + "External id": 501275,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 15455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141690202.988, "dur": 4.229, + "args": { + "External id": 501276,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141690208.778, "dur": 18.149, + "args": { + "External id": 501277,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 15457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141690254.520, "dur": 3.967, + "args": { + "External id": 501278,"Sequence number": 5516224, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 15458 + } + }, + { + "ph": "s", "id": 11, "pid": 2338711, "tid": 2338711, "ts": 6274141690254.520, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141690260.787, "dur": 0.973, + "args": { + "External id": 501279,"Sequence number": 5516225, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141690298.064, "dur": 49011.242, + "args": { + "External id": 501280,"Sequence number": 5516225, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 15460 + } + }, + { + "ph": "s", "id": 10, "pid": 2338711, "tid": 2338711, "ts": 6274141690298.064, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2338711, + "ts": 6274141690313.477, "dur": 25.461, + "args": { + "External id": 501281,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2338711, + "ts": 6274141690314.162, "dur": 24.592, + "args": { + "External id": 501282,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141690315.241, "dur": 5.195, + "args": { + "External id": 501283,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141690316.558, "dur": 3.307, + "args": { + "External id": 501284,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141690321.269, "dur": 16.902, + "args": { + "External id": 501285,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 15465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141690355.155, "dur": 26.741, + "args": { + "External id": 501286,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141690356.357, "dur": 5.879, + "args": { + "External id": 501287,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141690358.213, "dur": 3.706, + "args": { + "External id": 501288,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274141690363.496, "dur": 18.125, + "args": { + "External id": 501289,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141690367.489, "dur": 13.693, + "args": { + "External id": 501290,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141690385.586, "dur": 19.011, + "args": { + "External id": 501291,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141690386.445, "dur": 4.511, + "args": { + "External id": 501292,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141690387.881, "dur": 2.856, + "args": { + "External id": 501293,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274141690391.496, "dur": 12.907, + "args": { + "External id": 501294,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141690392.114, "dur": 11.989, + "args": { + "External id": 501295,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 15475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2338711, + "ts": 6274141690410.951, "dur": 20.076, + "args": { + "External id": 501296,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 15476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141690412.641, "dur": 2.331, + "args": { + "External id": 501297,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274141690415.508, "dur": 15.126, + "args": { + "External id": 501298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 15478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141690420.308, "dur": 10.014, + "args": { + "External id": 501299,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338711, "tid": 2338711, + "ts": 6274141690436.146, "dur": 23.106, + "args": { + "External id": 501300,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141690462.171, "dur": 51.712, + "args": { + "External id": 501301,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141690464.142, "dur": 49.340, + "args": { + "External id": 501302,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141690468.737, "dur": 0.717, + "args": { + "External id": 501303,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141690471.060, "dur": 23.808, + "args": { + "External id": 501304,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141690472.538, "dur": 22.091, + "args": { + "External id": 501305,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 15485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141690477.728, "dur": 2.865, + "args": { + "External id": 501306,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141690481.234, "dur": 12.975, + "args": { + "External id": 501307,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 15487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338711, "tid": 2338711, + "ts": 6274141690520.714, "dur": 43003.293, + "args": { + "External id": 501308,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338711, "tid": 2338711, + "ts": 6274141690522.395, "dur": 43000.399, + "args": { + "External id": 501309,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141733536.608, "dur": 7.106, + "args": { + "External id": 501310,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141733540.717, "dur": 1.439, + "args": { + "External id": 501311,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141733548.947, "dur": 117.532, + "args": { + "External id": 501312,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141733550.876, "dur": 6.128, + "args": { + "External id": 501313,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141733553.326, "dur": 2.883, + "args": { + "External id": 501314,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141733555.434, "dur": 0.508, + "args": { + "External id": 501315,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141733560.384, "dur": 105.503, + "args": { + "External id": 501316,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141733562.182, "dur": 102.408, + "args": { + "External id": 501317,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141733671.879, "dur": 5.484, + "args": { + "External id": 501318,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141733674.808, "dur": 0.832, + "args": { + "External id": 501319,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141733685.390, "dur": 2.364, + "args": { + "External id": 501320,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141733697.569, "dur": 8.953, + "args": { + "External id": 501321,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141733701.841, "dur": 4.368, + "args": { + "External id": 501322,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141733843.314, "dur": 245.317, + "args": { + "External id": 501323,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141733847.205, "dur": 2.414, + "args": { + "External id": 501324,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141733851.543, "dur": 236.497, + "args": { + "External id": 501325,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141733853.349, "dur": 0.620, + "args": { + "External id": 501326,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141733857.738, "dur": 28.616, + "args": { + "External id": 501327,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141733888.069, "dur": 3.987, + "args": { + "External id": 501328,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141733891.158, "dur": 0.584, + "args": { + "External id": 501329,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141733893.204, "dur": 29.539, + "args": { + "External id": 501330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141733896.550, "dur": 3.176, + "args": { + "External id": 501331,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141733900.910, "dur": 21.459, + "args": { + "External id": 501332,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141733904.922, "dur": 3.048, + "args": { + "External id": 501333,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141733924.638, "dur": 22.606, + "args": { + "External id": 501334,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141733949.319, "dur": 16.780, + "args": { + "External id": 501335,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141733969.407, "dur": 15.241, + "args": { + "External id": 501336,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141733986.517, "dur": 15.350, + "args": { + "External id": 501337,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141734003.834, "dur": 23.761, + "args": { + "External id": 501338,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141734006.151, "dur": 1.411, + "args": { + "External id": 501339,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141734012.075, "dur": 0.449, + "args": { + "External id": 501340,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141734028.897, "dur": 14.232, + "args": { + "External id": 501341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141734044.758, "dur": 40.969, + "args": { + "External id": 501342,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141734097.868, "dur": 2.824, + "args": { + "External id": 501343,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141734108.207, "dur": 4.330, + "args": { + "External id": 501344,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141734110.804, "dur": 0.735, + "args": { + "External id": 501345,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141734196.728, "dur": 73.768, + "args": { + "External id": 501346,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141734279.519, "dur": 6.305, + "args": { + "External id": 501347,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141734282.347, "dur": 0.764, + "args": { + "External id": 501348,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141734287.566, "dur": 32.035, + "args": { + "External id": 501349,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141734325.277, "dur": 7.994, + "args": { + "External id": 501350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141734327.274, "dur": 5.103, + "args": { + "External id": 501351,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141734329.683, "dur": 2.408, + "args": { + "External id": 501352,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141734338.633, "dur": 48.967, + "args": { + "External id": 501353,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141734340.047, "dur": 46.526, + "args": { + "External id": 501354,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141734391.668, "dur": 17.873, + "args": { + "External id": 501355,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141734416.514, "dur": 4.005, + "args": { + "External id": 501356,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141734418.947, "dur": 0.642, + "args": { + "External id": 501357,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141734425.156, "dur": 47.752, + "args": { + "External id": 501358,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141734426.241, "dur": 6.156, + "args": { + "External id": 501359,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141734429.361, "dur": 2.456, + "args": { + "External id": 501360,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141734431.108, "dur": 0.541, + "args": { + "External id": 501361,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141734433.261, "dur": 39.276, + "args": { + "External id": 501362,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141734433.726, "dur": 38.218, + "args": { + "External id": 501363,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141734477.411, "dur": 3.756, + "args": { + "External id": 501364,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141734479.394, "dur": 0.654, + "args": { + "External id": 501365,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141734488.835, "dur": 1.658, + "args": { + "External id": 501366,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141734498.598, "dur": 6.879, + "args": { + "External id": 501367,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141734500.908, "dur": 4.214, + "args": { + "External id": 501368,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141734620.298, "dur": 203.213, + "args": { + "External id": 501369,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141734622.982, "dur": 3.617, + "args": { + "External id": 501370,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141734630.293, "dur": 192.613, + "args": { + "External id": 501371,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141734634.140, "dur": 0.479, + "args": { + "External id": 501372,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141734636.303, "dur": 23.584, + "args": { + "External id": 501373,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141734661.482, "dur": 7.086, + "args": { + "External id": 501374,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141734664.117, "dur": 4.159, + "args": { + "External id": 501375,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141734669.566, "dur": 22.039, + "args": { + "External id": 501376,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141734670.533, "dur": 1.156, + "args": { + "External id": 501377,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141734673.050, "dur": 18.233, + "args": { + "External id": 501378,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141734676.226, "dur": 2.120, + "args": { + "External id": 501379,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141734693.056, "dur": 23.197, + "args": { + "External id": 501380,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141734717.762, "dur": 14.327, + "args": { + "External id": 501381,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141734737.010, "dur": 15.698, + "args": { + "External id": 501382,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141734754.307, "dur": 13.117, + "args": { + "External id": 501383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141734769.603, "dur": 23.657, + "args": { + "External id": 501384,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141734771.910, "dur": 2.059, + "args": { + "External id": 501385,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141734775.961, "dur": 2.280, + "args": { + "External id": 501386,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141734794.631, "dur": 13.273, + "args": { + "External id": 501387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141734809.045, "dur": 12.717, + "args": { + "External id": 501388,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141734832.524, "dur": 2.400, + "args": { + "External id": 501389,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141734845.388, "dur": 3.799, + "args": { + "External id": 501390,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141734847.972, "dur": 0.507, + "args": { + "External id": 501391,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141734920.656, "dur": 57.353, + "args": { + "External id": 501392,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141734983.195, "dur": 4.853, + "args": { + "External id": 501393,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141734986.373, "dur": 0.609, + "args": { + "External id": 501394,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141734989.459, "dur": 26.915, + "args": { + "External id": 501395,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141735021.210, "dur": 7.992, + "args": { + "External id": 501396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141735022.824, "dur": 5.643, + "args": { + "External id": 501397,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141735027.263, "dur": 1.015, + "args": { + "External id": 501398,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141735031.686, "dur": 79.243, + "args": { + "External id": 501399,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141735032.859, "dur": 77.014, + "args": { + "External id": 501400,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141735117.014, "dur": 18.726, + "args": { + "External id": 501401,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141735145.236, "dur": 4.162, + "args": { + "External id": 501402,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141735147.653, "dur": 0.669, + "args": { + "External id": 501403,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141735155.856, "dur": 49.190, + "args": { + "External id": 501404,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141735156.922, "dur": 3.974, + "args": { + "External id": 501405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141735157.909, "dur": 2.373, + "args": { + "External id": 501406,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141735159.437, "dur": 0.705, + "args": { + "External id": 501407,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141735161.698, "dur": 42.831, + "args": { + "External id": 501408,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141735162.534, "dur": 41.404, + "args": { + "External id": 501409,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141735209.446, "dur": 5.255, + "args": { + "External id": 501410,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141735211.260, "dur": 2.363, + "args": { + "External id": 501411,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141735223.762, "dur": 1.474, + "args": { + "External id": 501412,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141735234.354, "dur": 7.730, + "args": { + "External id": 501413,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141735236.442, "dur": 5.321, + "args": { + "External id": 501414,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141735337.339, "dur": 189.890, + "args": { + "External id": 501415,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141735339.741, "dur": 2.207, + "args": { + "External id": 501416,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141735343.517, "dur": 183.150, + "args": { + "External id": 501417,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141735347.311, "dur": 0.423, + "args": { + "External id": 501418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141735348.795, "dur": 23.343, + "args": { + "External id": 501419,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141735373.828, "dur": 3.273, + "args": { + "External id": 501420,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141735376.202, "dur": 0.636, + "args": { + "External id": 501421,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141735377.926, "dur": 22.093, + "args": { + "External id": 501422,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141735378.785, "dur": 1.337, + "args": { + "External id": 501423,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141735381.468, "dur": 18.168, + "args": { + "External id": 501424,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141735386.205, "dur": 2.857, + "args": { + "External id": 501425,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141735401.437, "dur": 23.324, + "args": { + "External id": 501426,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141735426.356, "dur": 13.746, + "args": { + "External id": 501427,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141735445.576, "dur": 12.201, + "args": { + "External id": 501428,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141735459.379, "dur": 12.614, + "args": { + "External id": 501429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141735473.628, "dur": 18.893, + "args": { + "External id": 501430,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141735475.608, "dur": 1.714, + "args": { + "External id": 501431,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141735479.499, "dur": 0.645, + "args": { + "External id": 501432,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141735493.841, "dur": 14.715, + "args": { + "External id": 501433,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141735512.310, "dur": 13.146, + "args": { + "External id": 501434,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141735534.350, "dur": 1.762, + "args": { + "External id": 501435,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141735544.954, "dur": 3.194, + "args": { + "External id": 501436,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141735547.041, "dur": 0.351, + "args": { + "External id": 501437,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141735638.698, "dur": 60.774, + "args": { + "External id": 501438,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141735705.260, "dur": 5.601, + "args": { + "External id": 501439,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141735708.502, "dur": 0.908, + "args": { + "External id": 501440,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141735712.498, "dur": 35.259, + "args": { + "External id": 501441,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141735755.617, "dur": 5.429, + "args": { + "External id": 501442,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141735757.278, "dur": 3.027, + "args": { + "External id": 501443,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141735759.280, "dur": 0.821, + "args": { + "External id": 501444,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141735763.875, "dur": 74.115, + "args": { + "External id": 501445,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141735765.038, "dur": 72.467, + "args": { + "External id": 501446,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141735842.126, "dur": 16.466, + "args": { + "External id": 501447,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141735864.837, "dur": 6.381, + "args": { + "External id": 501448,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141735869.698, "dur": 0.600, + "args": { + "External id": 501449,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141735875.081, "dur": 51.348, + "args": { + "External id": 501450,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141735876.174, "dur": 3.590, + "args": { + "External id": 501451,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141735876.879, "dur": 2.341, + "args": { + "External id": 501452,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141735878.519, "dur": 0.562, + "args": { + "External id": 501453,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141735880.426, "dur": 45.682, + "args": { + "External id": 501454,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141735886.572, "dur": 38.838, + "args": { + "External id": 501455,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141735930.593, "dur": 5.764, + "args": { + "External id": 501456,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141735932.412, "dur": 2.779, + "args": { + "External id": 501457,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141735942.375, "dur": 1.701, + "args": { + "External id": 501458,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141735952.392, "dur": 7.729, + "args": { + "External id": 501459,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141735956.113, "dur": 3.767, + "args": { + "External id": 501460,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141736049.378, "dur": 237.459, + "args": { + "External id": 501461,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141736051.114, "dur": 39.939, + "args": { + "External id": 501462,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141736094.326, "dur": 192.120, + "args": { + "External id": 501463,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141736095.848, "dur": 0.624, + "args": { + "External id": 501464,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141736097.425, "dur": 27.513, + "args": { + "External id": 501465,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141736126.720, "dur": 3.829, + "args": { + "External id": 501466,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141736129.243, "dur": 1.020, + "args": { + "External id": 501467,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141736131.730, "dur": 27.896, + "args": { + "External id": 501468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141736135.811, "dur": 1.518, + "args": { + "External id": 501469,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141736138.846, "dur": 20.488, + "args": { + "External id": 501470,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141736143.594, "dur": 2.561, + "args": { + "External id": 501471,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141736161.235, "dur": 21.965, + "args": { + "External id": 501472,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141736185.106, "dur": 14.333, + "args": { + "External id": 501473,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141736202.055, "dur": 14.559, + "args": { + "External id": 501474,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141736218.422, "dur": 13.244, + "args": { + "External id": 501475,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141736233.342, "dur": 23.721, + "args": { + "External id": 501476,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141736235.824, "dur": 1.745, + "args": { + "External id": 501477,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141736241.725, "dur": 0.616, + "args": { + "External id": 501478,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141736258.409, "dur": 13.387, + "args": { + "External id": 501479,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141736273.055, "dur": 12.307, + "args": { + "External id": 501480,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141736295.392, "dur": 2.409, + "args": { + "External id": 501481,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141736308.022, "dur": 3.953, + "args": { + "External id": 501482,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141736310.733, "dur": 0.387, + "args": { + "External id": 501483,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141736381.524, "dur": 57.043, + "args": { + "External id": 501484,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141736443.798, "dur": 7.456, + "args": { + "External id": 501485,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141736449.095, "dur": 0.845, + "args": { + "External id": 501486,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141736453.129, "dur": 26.337, + "args": { + "External id": 501487,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141736483.815, "dur": 5.364, + "args": { + "External id": 501488,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141736485.573, "dur": 2.973, + "args": { + "External id": 501489,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141736487.853, "dur": 0.519, + "args": { + "External id": 501490,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141736491.889, "dur": 40.690, + "args": { + "External id": 501491,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141736492.916, "dur": 39.177, + "args": { + "External id": 501492,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141736536.445, "dur": 13.968, + "args": { + "External id": 501493,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141736558.335, "dur": 3.769, + "args": { + "External id": 501494,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141736560.566, "dur": 0.671, + "args": { + "External id": 501495,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141736566.022, "dur": 69.943, + "args": { + "External id": 501496,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141736567.412, "dur": 7.904, + "args": { + "External id": 501497,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141736568.300, "dur": 6.381, + "args": { + "External id": 501498,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141736573.959, "dur": 0.606, + "args": { + "External id": 501499,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141736575.926, "dur": 59.533, + "args": { + "External id": 501500,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141736576.707, "dur": 57.701, + "args": { + "External id": 501501,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141736642.379, "dur": 4.596, + "args": { + "External id": 501502,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141736644.974, "dur": 0.708, + "args": { + "External id": 501503,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141736652.909, "dur": 1.788, + "args": { + "External id": 501504,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141736662.630, "dur": 8.155, + "args": { + "External id": 501505,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141736666.786, "dur": 3.729, + "args": { + "External id": 501506,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141736754.190, "dur": 173.219, + "args": { + "External id": 501507,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141736756.529, "dur": 1.734, + "args": { + "External id": 501508,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141736760.102, "dur": 166.977, + "args": { + "External id": 501509,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141736763.439, "dur": 0.375, + "args": { + "External id": 501510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141736764.959, "dur": 20.252, + "args": { + "External id": 501511,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141736786.705, "dur": 3.097, + "args": { + "External id": 501512,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141736788.717, "dur": 0.749, + "args": { + "External id": 501513,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141736790.706, "dur": 24.574, + "args": { + "External id": 501514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141736796.618, "dur": 1.113, + "args": { + "External id": 501515,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141736798.815, "dur": 16.211, + "args": { + "External id": 501516,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141736801.371, "dur": 2.579, + "args": { + "External id": 501517,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141736816.500, "dur": 20.364, + "args": { + "External id": 501518,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141736838.507, "dur": 12.489, + "args": { + "External id": 501519,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141736853.790, "dur": 12.797, + "args": { + "External id": 501520,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141736867.926, "dur": 11.907, + "args": { + "External id": 501521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141736881.342, "dur": 19.935, + "args": { + "External id": 501522,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141736883.549, "dur": 1.098, + "args": { + "External id": 501523,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141736888.427, "dur": 0.653, + "args": { + "External id": 501524,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141736902.527, "dur": 11.068, + "args": { + "External id": 501525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141736914.560, "dur": 11.476, + "args": { + "External id": 501526,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141736933.744, "dur": 1.670, + "args": { + "External id": 501527,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141736943.221, "dur": 3.660, + "args": { + "External id": 501528,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141736945.598, "dur": 0.510, + "args": { + "External id": 501529,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141737006.516, "dur": 81.778, + "args": { + "External id": 501530,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141737098.097, "dur": 7.767, + "args": { + "External id": 501531,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141737101.638, "dur": 2.567, + "args": { + "External id": 501532,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141737107.335, "dur": 27.737, + "args": { + "External id": 501533,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141737140.274, "dur": 5.211, + "args": { + "External id": 501534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141737142.169, "dur": 2.636, + "args": { + "External id": 501535,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141737143.937, "dur": 0.655, + "args": { + "External id": 501536,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141737150.653, "dur": 46.178, + "args": { + "External id": 501537,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141737151.855, "dur": 44.376, + "args": { + "External id": 501538,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141737200.763, "dur": 15.184, + "args": { + "External id": 501539,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141737222.386, "dur": 3.638, + "args": { + "External id": 501540,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141737224.584, "dur": 0.536, + "args": { + "External id": 501541,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141737230.127, "dur": 46.336, + "args": { + "External id": 501542,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141737231.063, "dur": 5.653, + "args": { + "External id": 501543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141737234.019, "dur": 1.967, + "args": { + "External id": 501544,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141737235.163, "dur": 0.702, + "args": { + "External id": 501545,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141737237.652, "dur": 38.458, + "args": { + "External id": 501546,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141737238.321, "dur": 37.056, + "args": { + "External id": 501547,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141737280.128, "dur": 3.676, + "args": { + "External id": 501548,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141737282.107, "dur": 0.729, + "args": { + "External id": 501549,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141737292.237, "dur": 1.517, + "args": { + "External id": 501550,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141737301.604, "dur": 8.118, + "args": { + "External id": 501551,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141737303.494, "dur": 5.955, + "args": { + "External id": 501552,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141737392.287, "dur": 176.172, + "args": { + "External id": 501553,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141737393.971, "dur": 3.709, + "args": { + "External id": 501554,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141737399.531, "dur": 168.508, + "args": { + "External id": 501555,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141737403.107, "dur": 0.479, + "args": { + "External id": 501556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141737404.680, "dur": 20.616, + "args": { + "External id": 501557,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141737426.857, "dur": 5.170, + "args": { + "External id": 501558,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141737431.067, "dur": 0.723, + "args": { + "External id": 501559,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141737432.822, "dur": 22.105, + "args": { + "External id": 501560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141737434.071, "dur": 1.020, + "args": { + "External id": 501561,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141737436.279, "dur": 18.319, + "args": { + "External id": 501562,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141737438.631, "dur": 2.193, + "args": { + "External id": 501563,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141737456.276, "dur": 20.401, + "args": { + "External id": 501564,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141737478.365, "dur": 12.222, + "args": { + "External id": 501565,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141737494.849, "dur": 12.299, + "args": { + "External id": 501566,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141737508.580, "dur": 11.480, + "args": { + "External id": 501567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141737521.652, "dur": 19.524, + "args": { + "External id": 501568,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141737525.337, "dur": 1.223, + "args": { + "External id": 501569,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141737528.464, "dur": 0.565, + "args": { + "External id": 501570,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141737542.560, "dur": 10.922, + "args": { + "External id": 501571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141737554.517, "dur": 12.138, + "args": { + "External id": 501572,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141737577.082, "dur": 1.629, + "args": { + "External id": 501573,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141737604.507, "dur": 4.533, + "args": { + "External id": 501574,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141737607.113, "dur": 0.852, + "args": { + "External id": 501575,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141737673.913, "dur": 54.290, + "args": { + "External id": 501576,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141737733.419, "dur": 4.580, + "args": { + "External id": 501577,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141737736.409, "dur": 0.553, + "args": { + "External id": 501578,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141737739.395, "dur": 27.677, + "args": { + "External id": 501579,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141737771.801, "dur": 7.981, + "args": { + "External id": 501580,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141737773.754, "dur": 5.298, + "args": { + "External id": 501581,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141737777.972, "dur": 0.927, + "args": { + "External id": 501582,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141737782.380, "dur": 40.913, + "args": { + "External id": 501583,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141737783.580, "dur": 39.105, + "args": { + "External id": 501584,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141737826.901, "dur": 16.655, + "args": { + "External id": 501585,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141737849.181, "dur": 3.622, + "args": { + "External id": 501586,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141737851.556, "dur": 0.442, + "args": { + "External id": 501587,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141737859.076, "dur": 44.874, + "args": { + "External id": 501588,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141737859.881, "dur": 3.270, + "args": { + "External id": 501589,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141737860.694, "dur": 1.902, + "args": { + "External id": 501590,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141737861.901, "dur": 0.584, + "args": { + "External id": 501591,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141737863.788, "dur": 39.683, + "args": { + "External id": 501592,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141737864.354, "dur": 38.562, + "args": { + "External id": 501593,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141737907.892, "dur": 3.886, + "args": { + "External id": 501594,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141737910.265, "dur": 0.499, + "args": { + "External id": 501595,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141737919.186, "dur": 1.469, + "args": { + "External id": 501596,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141737928.303, "dur": 5.480, + "args": { + "External id": 501597,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141737930.056, "dur": 3.441, + "args": { + "External id": 501598,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141738011.493, "dur": 245.912, + "args": { + "External id": 501599,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141738015.363, "dur": 2.013, + "args": { + "External id": 501600,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141738018.678, "dur": 238.096, + "args": { + "External id": 501601,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141738022.141, "dur": 0.619, + "args": { + "External id": 501602,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141738023.884, "dur": 21.021, + "args": { + "External id": 501603,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141738046.759, "dur": 4.073, + "args": { + "External id": 501604,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141738049.925, "dur": 0.724, + "args": { + "External id": 501605,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141738097.195, "dur": 29.502, + "args": { + "External id": 501606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141738099.503, "dur": 2.546, + "args": { + "External id": 501607,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141738103.894, "dur": 22.488, + "args": { + "External id": 501608,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141738107.282, "dur": 3.439, + "args": { + "External id": 501609,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141738127.946, "dur": 22.393, + "args": { + "External id": 501610,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141738151.709, "dur": 17.136, + "args": { + "External id": 501611,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141738174.591, "dur": 13.548, + "args": { + "External id": 501612,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141738189.529, "dur": 13.130, + "args": { + "External id": 501613,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141738204.561, "dur": 21.677, + "args": { + "External id": 501614,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141738206.636, "dur": 1.532, + "args": { + "External id": 501615,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141738210.124, "dur": 0.915, + "args": { + "External id": 501616,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141738227.724, "dur": 12.954, + "args": { + "External id": 501617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141738244.405, "dur": 11.226, + "args": { + "External id": 501618,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141738265.865, "dur": 2.048, + "args": { + "External id": 501619,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141738277.251, "dur": 3.813, + "args": { + "External id": 501620,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141738279.798, "dur": 0.563, + "args": { + "External id": 501621,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141738355.126, "dur": 59.773, + "args": { + "External id": 501622,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141738419.895, "dur": 4.656, + "args": { + "External id": 501623,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141738422.828, "dur": 0.663, + "args": { + "External id": 501624,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141738425.901, "dur": 24.136, + "args": { + "External id": 501625,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141738456.400, "dur": 5.465, + "args": { + "External id": 501626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141738457.790, "dur": 3.403, + "args": { + "External id": 501627,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141738459.613, "dur": 1.361, + "args": { + "External id": 501628,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141738464.400, "dur": 42.853, + "args": { + "External id": 501629,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141738465.700, "dur": 40.844, + "args": { + "External id": 501630,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141738510.913, "dur": 14.316, + "args": { + "External id": 501631,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141738531.210, "dur": 6.004, + "args": { + "External id": 501632,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141738535.788, "dur": 0.539, + "args": { + "External id": 501633,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141738541.167, "dur": 61.409, + "args": { + "External id": 501634,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141738542.097, "dur": 3.198, + "args": { + "External id": 501635,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141738542.977, "dur": 1.778, + "args": { + "External id": 501636,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141738544.112, "dur": 0.520, + "args": { + "External id": 501637,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141738545.968, "dur": 56.128, + "args": { + "External id": 501638,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141738549.172, "dur": 52.065, + "args": { + "External id": 501639,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141738608.148, "dur": 4.961, + "args": { + "External id": 501640,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141738611.011, "dur": 0.661, + "args": { + "External id": 501641,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141738619.365, "dur": 1.613, + "args": { + "External id": 501642,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141738629.326, "dur": 11.678, + "args": { + "External id": 501643,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141738633.455, "dur": 7.211, + "args": { + "External id": 501644,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141738732.211, "dur": 171.753, + "args": { + "External id": 501645,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141738734.289, "dur": 1.819, + "args": { + "External id": 501646,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141738737.495, "dur": 166.046, + "args": { + "External id": 501647,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141738740.700, "dur": 0.415, + "args": { + "External id": 501648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141738742.266, "dur": 20.861, + "args": { + "External id": 501649,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141738764.616, "dur": 3.023, + "args": { + "External id": 501650,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141738766.815, "dur": 0.597, + "args": { + "External id": 501651,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141738770.955, "dur": 21.567, + "args": { + "External id": 501652,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141738773.838, "dur": 1.571, + "args": { + "External id": 501653,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141738776.466, "dur": 15.822, + "args": { + "External id": 501654,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141738778.764, "dur": 2.764, + "args": { + "External id": 501655,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141738793.754, "dur": 19.184, + "args": { + "External id": 501656,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141738814.668, "dur": 12.063, + "args": { + "External id": 501657,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141738829.077, "dur": 12.332, + "args": { + "External id": 501658,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141738842.835, "dur": 11.594, + "args": { + "External id": 501659,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141738856.016, "dur": 21.398, + "args": { + "External id": 501660,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141738858.026, "dur": 1.636, + "args": { + "External id": 501661,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141738863.956, "dur": 0.508, + "args": { + "External id": 501662,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141738878.773, "dur": 11.087, + "args": { + "External id": 501663,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141738890.839, "dur": 11.626, + "args": { + "External id": 501664,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141738910.323, "dur": 1.606, + "args": { + "External id": 501665,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141738920.150, "dur": 3.030, + "args": { + "External id": 501666,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141738922.161, "dur": 0.322, + "args": { + "External id": 501667,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141738983.149, "dur": 53.224, + "args": { + "External id": 501668,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141739041.222, "dur": 8.269, + "args": { + "External id": 501669,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141739046.304, "dur": 1.996, + "args": { + "External id": 501670,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141739050.919, "dur": 61.534, + "args": { + "External id": 501671,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141739119.259, "dur": 5.687, + "args": { + "External id": 501672,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141739120.898, "dur": 3.242, + "args": { + "External id": 501673,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141739122.927, "dur": 0.974, + "args": { + "External id": 501674,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141739127.904, "dur": 49.469, + "args": { + "External id": 501675,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141739131.852, "dur": 44.831, + "args": { + "External id": 501676,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141739181.392, "dur": 18.869, + "args": { + "External id": 501677,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141739204.709, "dur": 23.791, + "args": { + "External id": 501678,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141739207.438, "dur": 20.649, + "args": { + "External id": 501679,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141739212.372, "dur": 0.932, + "args": { + "External id": 501680,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141739234.006, "dur": 25.568, + "args": { + "External id": 501681,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141739235.907, "dur": 23.437, + "args": { + "External id": 501682,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 15862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141739241.343, "dur": 3.490, + "args": { + "External id": 501683,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141739245.943, "dur": 12.863, + "args": { + "External id": 501684,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2338711, + "ts": 6274141739274.723, "dur": 5.229, + "args": { + "External id": 501685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2338711, + "ts": 6274141739276.958, "dur": 2.748, + "args": { + "External id": 501686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2338711, + "ts": 6274141739281.002, "dur": 1.358, + "args": { + "External id": 501687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2338711, + "ts": 6274141739281.660, "dur": 0.493, + "args": { + "External id": 501688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141739327.102, "dur": 21.931, + "args": { + "External id": 501689,"Sequence number": 5516226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141739353.204, "dur": 12.565, + "args": { + "External id": 501690,"Sequence number": 5516227, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15870 + } + }, + { + "ph": "s", "id": 9, "pid": 2338711, "tid": 2338711, "ts": 6274141739353.204, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141739372.000, "dur": 6.586, + "args": { + "External id": 501691,"Sequence number": 5516228, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 15871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141739375.852, "dur": 1.133, + "args": { + "External id": 501692,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2338711, + "ts": 6274141739381.137, "dur": 5.891, + "args": { + "External id": 501693,"Sequence number": 5516228, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "3"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 15873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141739385.160, "dur": 0.556, + "args": { + "External id": 501694,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141739388.317, "dur": 4.292, + "args": { + "External id": 501695,"Sequence number": 5516228, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 15875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141739391.614, "dur": 0.403, + "args": { + "External id": 501696,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 15876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141739396.719, "dur": 6.023, + "args": { + "External id": 501697,"Sequence number": 5516228, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15877 + } + }, + { + "ph": "s", "id": 8, "pid": 2338711, "tid": 2338711, "ts": 6274141739396.719, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141739400.509, "dur": 0.898, + "args": { + "External id": 501698,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141739403.729, "dur": 4.617, + "args": { + "External id": 501699,"Sequence number": 5516229, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15879 + } + }, + { + "ph": "s", "id": 7, "pid": 2338711, "tid": 2338711, "ts": 6274141739403.729, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141739407.205, "dur": 0.303, + "args": { + "External id": 501700,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2338711, + "ts": 6274141739409.261, "dur": 14.353, + "args": { + "External id": 501701,"Sequence number": 5516230, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 15881 + } + }, + { + "ph": "s", "id": 6, "pid": 2338711, "tid": 2338711, "ts": 6274141739409.261, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141739419.982, "dur": 2.605, + "args": { + "External id": 501702,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141739424.575, "dur": 5.659, + "args": { + "External id": 501703,"Sequence number": 5516231, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 15883 + } + }, + { + "ph": "s", "id": 5, "pid": 2338711, "tid": 2338711, "ts": 6274141739424.575, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141739427.040, "dur": 2.162, + "args": { + "External id": 501704,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 15884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2338711, + "ts": 6274141739434.654, "dur": 28.793, + "args": { + "External id": 501705,"Sequence number": 5516232, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2338711, + "ts": 6274141739436.261, "dur": 26.988, + "args": { + "External id": 501706,"Sequence number": 5516232, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141739438.841, "dur": 6.322, + "args": { + "External id": 501707,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 15887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141739441.409, "dur": 3.219, + "args": { + "External id": 501708,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141739445.936, "dur": 16.831, + "args": { + "External id": 501709,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 15889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141739491.822, "dur": 4.096, + "args": { + "External id": 501710,"Sequence number": 5516232, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 15890 + } + }, + { + "ph": "s", "id": 4, "pid": 2338711, "tid": 2338711, "ts": 6274141739491.822, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141739499.205, "dur": 1.256, + "args": { + "External id": 501711,"Sequence number": 5516233, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274141739532.403, "dur": 49493.102, + "args": { + "External id": 501712,"Sequence number": 5516233, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 15892 + } + }, + { + "ph": "s", "id": 3, "pid": 2338711, "tid": 2338711, "ts": 6274141739532.403, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2338711, + "ts": 6274141739546.995, "dur": 27.007, + "args": { + "External id": 501713,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2338711, + "ts": 6274141739547.872, "dur": 25.941, + "args": { + "External id": 501714,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141739549.328, "dur": 7.735, + "args": { + "External id": 501715,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141739552.518, "dur": 4.078, + "args": { + "External id": 501716,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141739557.821, "dur": 15.566, + "args": { + "External id": 501717,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 15897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141739607.763, "dur": 27.218, + "args": { + "External id": 501718,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141739608.967, "dur": 7.611, + "args": { + "External id": 501719,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141739611.292, "dur": 4.762, + "args": { + "External id": 501720,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274141739618.177, "dur": 16.570, + "args": { + "External id": 501721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141739619.829, "dur": 14.558, + "args": { + "External id": 501722,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141739643.134, "dur": 20.681, + "args": { + "External id": 501723,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141739643.977, "dur": 4.804, + "args": { + "External id": 501724,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141739645.471, "dur": 3.052, + "args": { + "External id": 501725,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274141739651.609, "dur": 12.014, + "args": { + "External id": 501726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141739652.274, "dur": 10.978, + "args": { + "External id": 501727,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 15907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2338711, + "ts": 6274141739670.763, "dur": 19.353, + "args": { + "External id": 501728,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 15908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141739672.803, "dur": 6.466, + "args": { + "External id": 501729,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274141739680.019, "dur": 9.834, + "args": { + "External id": 501730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 15910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141739680.663, "dur": 8.873, + "args": { + "External id": 501731,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338711, "tid": 2338711, + "ts": 6274141739694.885, "dur": 23.065, + "args": { + "External id": 501732,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141739720.865, "dur": 54.445, + "args": { + "External id": 501733,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141739725.212, "dur": 49.727, + "args": { + "External id": 501734,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141739729.410, "dur": 1.082, + "args": { + "External id": 501735,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141739732.083, "dur": 25.298, + "args": { + "External id": 501736,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141739733.825, "dur": 23.352, + "args": { + "External id": 501737,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 15917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141739738.014, "dur": 2.911, + "args": { + "External id": 501738,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141739741.510, "dur": 15.313, + "args": { + "External id": 501739,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 15919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338711, "tid": 2338711, + "ts": 6274141739780.070, "dur": 43383.800, + "args": { + "External id": 501740,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338711, "tid": 2338711, + "ts": 6274141739781.614, "dur": 43380.880, + "args": { + "External id": 501741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141783177.488, "dur": 7.923, + "args": { + "External id": 501742,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141783182.138, "dur": 1.474, + "args": { + "External id": 501743,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141783193.997, "dur": 103.464, + "args": { + "External id": 501744,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141783195.682, "dur": 6.100, + "args": { + "External id": 501745,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141783198.271, "dur": 2.688, + "args": { + "External id": 501746,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141783200.190, "dur": 0.514, + "args": { + "External id": 501747,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141783203.131, "dur": 93.738, + "args": { + "External id": 501748,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141783204.758, "dur": 91.254, + "args": { + "External id": 501749,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141783301.247, "dur": 5.407, + "args": { + "External id": 501750,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141783304.214, "dur": 0.741, + "args": { + "External id": 501751,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141783316.815, "dur": 2.364, + "args": { + "External id": 501752,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141783328.838, "dur": 6.591, + "args": { + "External id": 501753,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141783331.119, "dur": 4.010, + "args": { + "External id": 501754,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141783469.008, "dur": 264.412, + "args": { + "External id": 501755,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141783472.511, "dur": 4.102, + "args": { + "External id": 501756,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141783480.665, "dur": 252.194, + "args": { + "External id": 501757,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141783484.227, "dur": 0.467, + "args": { + "External id": 501758,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141783486.177, "dur": 28.142, + "args": { + "External id": 501759,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141783516.194, "dur": 29.669, + "args": { + "External id": 501760,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141783542.784, "dur": 2.425, + "args": { + "External id": 501761,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141783546.895, "dur": 25.119, + "args": { + "External id": 501762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141783548.098, "dur": 1.110, + "args": { + "External id": 501763,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141783550.321, "dur": 21.419, + "args": { + "External id": 501764,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141783554.278, "dur": 3.210, + "args": { + "External id": 501765,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141783575.998, "dur": 41.395, + "args": { + "External id": 501766,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141783620.225, "dur": 15.889, + "args": { + "External id": 501767,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141783639.795, "dur": 16.984, + "args": { + "External id": 501768,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141783658.413, "dur": 14.907, + "args": { + "External id": 501769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141783675.275, "dur": 24.371, + "args": { + "External id": 501770,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141783677.923, "dur": 1.578, + "args": { + "External id": 501771,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141783681.496, "dur": 2.111, + "args": { + "External id": 501772,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141783703.606, "dur": 13.636, + "args": { + "External id": 501773,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141783718.684, "dur": 13.040, + "args": { + "External id": 501774,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141783742.307, "dur": 2.730, + "args": { + "External id": 501775,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141783753.689, "dur": 3.731, + "args": { + "External id": 501776,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141783755.989, "dur": 0.440, + "args": { + "External id": 501777,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141783837.410, "dur": 72.523, + "args": { + "External id": 501778,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141783915.544, "dur": 7.212, + "args": { + "External id": 501779,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141783918.997, "dur": 0.866, + "args": { + "External id": 501780,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141783926.479, "dur": 29.178, + "args": { + "External id": 501781,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141783961.513, "dur": 7.225, + "args": { + "External id": 501782,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141783963.553, "dur": 4.498, + "args": { + "External id": 501783,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141783965.941, "dur": 1.819, + "args": { + "External id": 501784,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141783971.972, "dur": 45.393, + "args": { + "External id": 501785,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141783973.338, "dur": 43.430, + "args": { + "External id": 501786,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141784021.888, "dur": 16.860, + "args": { + "External id": 501787,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141784047.315, "dur": 3.741, + "args": { + "External id": 501788,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141784049.765, "dur": 0.393, + "args": { + "External id": 501789,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141784092.782, "dur": 58.838, + "args": { + "External id": 501790,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141784093.909, "dur": 6.635, + "args": { + "External id": 501791,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141784094.869, "dur": 4.895, + "args": { + "External id": 501792,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141784096.599, "dur": 2.857, + "args": { + "External id": 501793,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141784103.463, "dur": 47.604, + "args": { + "External id": 501794,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141784104.050, "dur": 46.383, + "args": { + "External id": 501795,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141784156.887, "dur": 4.690, + "args": { + "External id": 501796,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141784159.493, "dur": 0.549, + "args": { + "External id": 501797,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141784168.805, "dur": 1.861, + "args": { + "External id": 501798,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141784179.400, "dur": 9.465, + "args": { + "External id": 501799,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141784183.880, "dur": 4.733, + "args": { + "External id": 501800,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141784287.699, "dur": 198.379, + "args": { + "External id": 501801,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141784291.300, "dur": 2.092, + "args": { + "External id": 501802,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141784295.193, "dur": 190.350, + "args": { + "External id": 501803,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141784296.459, "dur": 0.386, + "args": { + "External id": 501804,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141784298.455, "dur": 22.908, + "args": { + "External id": 501805,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141784322.943, "dur": 4.998, + "args": { + "External id": 501806,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141784326.978, "dur": 0.742, + "args": { + "External id": 501807,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141784331.525, "dur": 23.284, + "args": { + "External id": 501808,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141784332.921, "dur": 1.169, + "args": { + "External id": 501809,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141784335.196, "dur": 19.356, + "args": { + "External id": 501810,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141784338.319, "dur": 2.572, + "args": { + "External id": 501811,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141784356.115, "dur": 23.970, + "args": { + "External id": 501812,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141784381.811, "dur": 15.886, + "args": { + "External id": 501813,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141784400.532, "dur": 13.423, + "args": { + "External id": 501814,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141784415.547, "dur": 13.641, + "args": { + "External id": 501815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141784431.098, "dur": 25.657, + "args": { + "External id": 501816,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141784434.945, "dur": 2.120, + "args": { + "External id": 501817,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141784441.292, "dur": 0.731, + "args": { + "External id": 501818,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141784458.147, "dur": 13.308, + "args": { + "External id": 501819,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141784472.795, "dur": 11.365, + "args": { + "External id": 501820,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141784493.515, "dur": 1.833, + "args": { + "External id": 501821,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141784504.025, "dur": 3.596, + "args": { + "External id": 501822,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141784506.332, "dur": 0.517, + "args": { + "External id": 501823,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141784576.097, "dur": 74.155, + "args": { + "External id": 501824,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141784657.079, "dur": 8.303, + "args": { + "External id": 501825,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141784662.850, "dur": 1.107, + "args": { + "External id": 501826,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141784666.779, "dur": 27.885, + "args": { + "External id": 501827,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141784699.936, "dur": 5.736, + "args": { + "External id": 501828,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141784701.482, "dur": 3.519, + "args": { + "External id": 501829,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141784703.690, "dur": 1.065, + "args": { + "External id": 501830,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141784718.014, "dur": 45.541, + "args": { + "External id": 501831,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141784721.042, "dur": 41.819, + "args": { + "External id": 501832,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141784767.544, "dur": 16.565, + "args": { + "External id": 501833,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141784789.815, "dur": 21.504, + "args": { + "External id": 501834,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141784809.526, "dur": 0.751, + "args": { + "External id": 501835,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141784816.035, "dur": 48.136, + "args": { + "External id": 501836,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141784817.298, "dur": 5.787, + "args": { + "External id": 501837,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141784818.113, "dur": 4.332, + "args": { + "External id": 501838,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141784821.716, "dur": 0.618, + "args": { + "External id": 501839,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141784824.068, "dur": 39.636, + "args": { + "External id": 501840,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141784824.587, "dur": 38.490, + "args": { + "External id": 501841,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141784868.181, "dur": 3.847, + "args": { + "External id": 501842,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141784870.438, "dur": 0.520, + "args": { + "External id": 501843,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141784878.709, "dur": 1.744, + "args": { + "External id": 501844,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141784887.998, "dur": 11.687, + "args": { + "External id": 501845,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141784892.173, "dur": 7.214, + "args": { + "External id": 501846,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141784988.875, "dur": 256.512, + "args": { + "External id": 501847,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141784991.206, "dur": 1.726, + "args": { + "External id": 501848,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141784994.325, "dur": 250.553, + "args": { + "External id": 501849,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141784996.123, "dur": 0.441, + "args": { + "External id": 501850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141784997.954, "dur": 25.747, + "args": { + "External id": 501851,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141785025.209, "dur": 3.257, + "args": { + "External id": 501852,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141785027.628, "dur": 0.638, + "args": { + "External id": 501853,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141785031.927, "dur": 73.053, + "args": { + "External id": 501854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141785032.973, "dur": 1.077, + "args": { + "External id": 501855,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141785035.268, "dur": 68.888, + "args": { + "External id": 501856,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141785040.213, "dur": 2.756, + "args": { + "External id": 501857,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141785107.135, "dur": 25.569, + "args": { + "External id": 501858,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141785134.496, "dur": 14.902, + "args": { + "External id": 501859,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141785152.574, "dur": 16.400, + "args": { + "External id": 501860,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141785170.663, "dur": 14.251, + "args": { + "External id": 501861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141785186.827, "dur": 25.643, + "args": { + "External id": 501862,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141785191.135, "dur": 1.661, + "args": { + "External id": 501863,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141785195.521, "dur": 0.567, + "args": { + "External id": 501864,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141785213.834, "dur": 15.767, + "args": { + "External id": 501865,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141785230.668, "dur": 13.134, + "args": { + "External id": 501866,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141785253.802, "dur": 2.565, + "args": { + "External id": 501867,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141785266.368, "dur": 4.212, + "args": { + "External id": 501868,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141785269.236, "dur": 0.529, + "args": { + "External id": 501869,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141785343.322, "dur": 83.198, + "args": { + "External id": 501870,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141785431.619, "dur": 4.726, + "args": { + "External id": 501871,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141785434.359, "dur": 0.910, + "args": { + "External id": 501872,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141785437.842, "dur": 31.452, + "args": { + "External id": 501873,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141785474.080, "dur": 7.659, + "args": { + "External id": 501874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141785475.904, "dur": 5.111, + "args": { + "External id": 501875,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141785480.084, "dur": 0.761, + "args": { + "External id": 501876,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141785484.813, "dur": 46.641, + "args": { + "External id": 501877,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141785486.030, "dur": 44.827, + "args": { + "External id": 501878,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141785535.084, "dur": 17.576, + "args": { + "External id": 501879,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141785558.903, "dur": 6.019, + "args": { + "External id": 501880,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141785563.516, "dur": 0.528, + "args": { + "External id": 501881,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141785569.302, "dur": 69.042, + "args": { + "External id": 501882,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141785572.724, "dur": 5.537, + "args": { + "External id": 501883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141785573.886, "dur": 3.727, + "args": { + "External id": 501884,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141785575.191, "dur": 2.198, + "args": { + "External id": 501885,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141785579.071, "dur": 58.713, + "args": { + "External id": 501886,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141785579.500, "dur": 57.464, + "args": { + "External id": 501887,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141785644.419, "dur": 4.939, + "args": { + "External id": 501888,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141785647.142, "dur": 0.880, + "args": { + "External id": 501889,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141785657.900, "dur": 1.726, + "args": { + "External id": 501890,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141785667.702, "dur": 8.778, + "args": { + "External id": 501891,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141785670.310, "dur": 5.891, + "args": { + "External id": 501892,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141785762.078, "dur": 173.471, + "args": { + "External id": 501893,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141785764.198, "dur": 1.760, + "args": { + "External id": 501894,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141785769.251, "dur": 165.716, + "args": { + "External id": 501895,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141785771.005, "dur": 0.507, + "args": { + "External id": 501896,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141785775.392, "dur": 20.806, + "args": { + "External id": 501897,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141785797.524, "dur": 3.499, + "args": { + "External id": 501898,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141785799.985, "dur": 0.814, + "args": { + "External id": 501899,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141785801.838, "dur": 20.967, + "args": { + "External id": 501900,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141785802.832, "dur": 2.563, + "args": { + "External id": 501901,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141785806.573, "dur": 15.982, + "args": { + "External id": 501902,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141785809.346, "dur": 2.253, + "args": { + "External id": 501903,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141785824.018, "dur": 19.699, + "args": { + "External id": 501904,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141785845.261, "dur": 12.747, + "args": { + "External id": 501905,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141785862.596, "dur": 12.002, + "args": { + "External id": 501906,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141785876.212, "dur": 11.910, + "args": { + "External id": 501907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141785889.800, "dur": 18.425, + "args": { + "External id": 501908,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141785892.041, "dur": 1.612, + "args": { + "External id": 501909,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141785895.768, "dur": 0.436, + "args": { + "External id": 501910,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141785909.579, "dur": 11.175, + "args": { + "External id": 501911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141785924.133, "dur": 9.547, + "args": { + "External id": 501912,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141785942.100, "dur": 1.921, + "args": { + "External id": 501913,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141785951.960, "dur": 3.642, + "args": { + "External id": 501914,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141785954.144, "dur": 0.588, + "args": { + "External id": 501915,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141786016.889, "dur": 98.026, + "args": { + "External id": 501916,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141786122.353, "dur": 6.489, + "args": { + "External id": 501917,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141786126.397, "dur": 0.851, + "args": { + "External id": 501918,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141786130.302, "dur": 26.824, + "args": { + "External id": 501919,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141786162.359, "dur": 7.768, + "args": { + "External id": 501920,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141786166.229, "dur": 3.186, + "args": { + "External id": 501921,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141786168.232, "dur": 0.958, + "args": { + "External id": 501922,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141786173.058, "dur": 46.464, + "args": { + "External id": 501923,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141786174.578, "dur": 44.351, + "args": { + "External id": 501924,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141786223.342, "dur": 16.161, + "args": { + "External id": 501925,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141786245.427, "dur": 5.921, + "args": { + "External id": 501926,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141786250.022, "dur": 0.494, + "args": { + "External id": 501927,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141786255.728, "dur": 48.286, + "args": { + "External id": 501928,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141786256.736, "dur": 5.879, + "args": { + "External id": 501929,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141786257.903, "dur": 4.118, + "args": { + "External id": 501930,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141786261.176, "dur": 0.713, + "args": { + "External id": 501931,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141786263.183, "dur": 40.400, + "args": { + "External id": 501932,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141786264.364, "dur": 38.555, + "args": { + "External id": 501933,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141786308.247, "dur": 3.714, + "args": { + "External id": 501934,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141786310.456, "dur": 0.491, + "args": { + "External id": 501935,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141786320.456, "dur": 1.723, + "args": { + "External id": 501936,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141786330.336, "dur": 6.082, + "args": { + "External id": 501937,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141786332.372, "dur": 3.764, + "args": { + "External id": 501938,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141786422.478, "dur": 213.060, + "args": { + "External id": 501939,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141786424.635, "dur": 1.822, + "args": { + "External id": 501940,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141786430.556, "dur": 204.525, + "args": { + "External id": 501941,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141786433.556, "dur": 0.328, + "args": { + "External id": 501942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141786435.144, "dur": 23.096, + "args": { + "External id": 501943,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141786460.088, "dur": 3.491, + "args": { + "External id": 501944,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141786462.511, "dur": 0.870, + "args": { + "External id": 501945,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141786464.387, "dur": 24.551, + "args": { + "External id": 501946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141786467.507, "dur": 1.052, + "args": { + "External id": 501947,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141786469.557, "dur": 19.146, + "args": { + "External id": 501948,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141786472.674, "dur": 2.897, + "args": { + "External id": 501949,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141786492.656, "dur": 21.517, + "args": { + "External id": 501950,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141786515.785, "dur": 13.889, + "args": { + "External id": 501951,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141786532.578, "dur": 14.978, + "args": { + "External id": 501952,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141786548.816, "dur": 13.443, + "args": { + "External id": 501953,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141786563.838, "dur": 36.794, + "args": { + "External id": 501954,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141786565.924, "dur": 1.308, + "args": { + "External id": 501955,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141786569.265, "dur": 0.718, + "args": { + "External id": 501956,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141786605.525, "dur": 14.725, + "args": { + "External id": 501957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141786621.238, "dur": 12.593, + "args": { + "External id": 501958,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141786643.533, "dur": 2.733, + "args": { + "External id": 501959,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141786656.026, "dur": 3.997, + "args": { + "External id": 501960,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141786658.329, "dur": 0.754, + "args": { + "External id": 501961,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141786728.071, "dur": 59.788, + "args": { + "External id": 501962,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141786793.308, "dur": 6.962, + "args": { + "External id": 501963,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141786796.395, "dur": 2.632, + "args": { + "External id": 501964,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141786804.015, "dur": 26.677, + "args": { + "External id": 501965,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141786835.195, "dur": 5.144, + "args": { + "External id": 501966,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141786836.821, "dur": 2.738, + "args": { + "External id": 501967,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141786838.521, "dur": 0.867, + "args": { + "External id": 501968,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141786842.993, "dur": 41.818, + "args": { + "External id": 501969,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141786843.995, "dur": 40.108, + "args": { + "External id": 501970,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141786888.821, "dur": 16.034, + "args": { + "External id": 501971,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141786912.940, "dur": 3.708, + "args": { + "External id": 501972,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141786915.391, "dur": 0.323, + "args": { + "External id": 501973,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141786920.528, "dur": 48.124, + "args": { + "External id": 501974,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141786921.561, "dur": 3.334, + "args": { + "External id": 501975,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141786922.337, "dur": 1.944, + "args": { + "External id": 501976,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141786923.606, "dur": 0.566, + "args": { + "External id": 501977,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141786927.899, "dur": 40.383, + "args": { + "External id": 501978,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141786928.874, "dur": 38.598, + "args": { + "External id": 501979,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141786972.255, "dur": 3.640, + "args": { + "External id": 501980,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141786974.357, "dur": 0.478, + "args": { + "External id": 501981,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141786981.048, "dur": 1.414, + "args": { + "External id": 501982,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141786990.022, "dur": 10.733, + "args": { + "External id": 501983,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141786994.317, "dur": 6.109, + "args": { + "External id": 501984,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141787139.431, "dur": 206.239, + "args": { + "External id": 501985,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141787141.983, "dur": 5.063, + "args": { + "External id": 501986,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141787148.930, "dur": 196.261, + "args": { + "External id": 501987,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141787150.258, "dur": 0.559, + "args": { + "External id": 501988,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141787152.112, "dur": 26.365, + "args": { + "External id": 501989,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141787180.202, "dur": 5.275, + "args": { + "External id": 501990,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141787184.238, "dur": 1.016, + "args": { + "External id": 501991,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141787188.755, "dur": 22.382, + "args": { + "External id": 501992,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141787190.042, "dur": 1.268, + "args": { + "External id": 501993,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141787192.324, "dur": 18.515, + "args": { + "External id": 501994,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141787195.296, "dur": 2.809, + "args": { + "External id": 501995,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141787212.372, "dur": 21.787, + "args": { + "External id": 501996,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141787235.806, "dur": 15.683, + "args": { + "External id": 501997,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141787253.913, "dur": 13.624, + "args": { + "External id": 501998,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141787268.864, "dur": 14.849, + "args": { + "External id": 501999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141787285.644, "dur": 25.156, + "args": { + "External id": 502000,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141787287.729, "dur": 1.535, + "args": { + "External id": 502001,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141787293.643, "dur": 2.008, + "args": { + "External id": 502002,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141787312.559, "dur": 13.466, + "args": { + "External id": 502003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141787327.064, "dur": 17.210, + "args": { + "External id": 502004,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141787352.968, "dur": 2.306, + "args": { + "External id": 502005,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141787364.142, "dur": 3.943, + "args": { + "External id": 502006,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141787366.692, "dur": 0.460, + "args": { + "External id": 502007,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141787436.497, "dur": 57.582, + "args": { + "External id": 502008,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141787498.753, "dur": 6.826, + "args": { + "External id": 502009,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141787503.772, "dur": 0.723, + "args": { + "External id": 502010,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141787507.405, "dur": 24.142, + "args": { + "External id": 502011,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141787535.839, "dur": 5.175, + "args": { + "External id": 502012,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141787537.395, "dur": 2.936, + "args": { + "External id": 502013,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141787539.352, "dur": 0.851, + "args": { + "External id": 502014,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141787543.334, "dur": 61.561, + "args": { + "External id": 502015,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141787546.735, "dur": 57.248, + "args": { + "External id": 502016,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141787610.984, "dur": 17.685, + "args": { + "External id": 502017,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141787635.184, "dur": 4.707, + "args": { + "External id": 502018,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141787638.035, "dur": 0.749, + "args": { + "External id": 502019,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141787644.004, "dur": 50.673, + "args": { + "External id": 502020,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141787645.388, "dur": 6.148, + "args": { + "External id": 502021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141787646.487, "dur": 4.494, + "args": { + "External id": 502022,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141787650.351, "dur": 0.513, + "args": { + "External id": 502023,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141787652.141, "dur": 42.071, + "args": { + "External id": 502024,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141787653.078, "dur": 40.630, + "args": { + "External id": 502025,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141787698.807, "dur": 5.876, + "args": { + "External id": 502026,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141787700.953, "dur": 2.621, + "args": { + "External id": 502027,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141787711.307, "dur": 1.555, + "args": { + "External id": 502028,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141787720.907, "dur": 8.056, + "args": { + "External id": 502029,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141787724.758, "dur": 3.949, + "args": { + "External id": 502030,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141787809.548, "dur": 188.190, + "args": { + "External id": 502031,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141787813.905, "dur": 1.706, + "args": { + "External id": 502032,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141787816.877, "dur": 180.400, + "args": { + "External id": 502033,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141787818.242, "dur": 0.553, + "args": { + "External id": 502034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141787820.307, "dur": 21.382, + "args": { + "External id": 502035,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141787843.048, "dur": 5.289, + "args": { + "External id": 502036,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141787847.214, "dur": 0.737, + "args": { + "External id": 502037,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141787851.127, "dur": 22.674, + "args": { + "External id": 502038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141787852.421, "dur": 1.034, + "args": { + "External id": 502039,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141787854.276, "dur": 19.295, + "args": { + "External id": 502040,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141787857.518, "dur": 2.904, + "args": { + "External id": 502041,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141787875.224, "dur": 22.145, + "args": { + "External id": 502042,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141787899.069, "dur": 16.635, + "args": { + "External id": 502043,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141787918.640, "dur": 14.514, + "args": { + "External id": 502044,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141787935.015, "dur": 13.253, + "args": { + "External id": 502045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141787949.974, "dur": 20.962, + "args": { + "External id": 502046,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141787954.150, "dur": 1.365, + "args": { + "External id": 502047,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141787957.519, "dur": 0.604, + "args": { + "External id": 502048,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141787972.080, "dur": 11.410, + "args": { + "External id": 502049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141787984.534, "dur": 11.808, + "args": { + "External id": 502050,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141788003.443, "dur": 1.747, + "args": { + "External id": 502051,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141788014.089, "dur": 3.678, + "args": { + "External id": 502052,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141788016.681, "dur": 0.322, + "args": { + "External id": 502053,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141788116.014, "dur": 55.824, + "args": { + "External id": 502054,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141788177.519, "dur": 5.568, + "args": { + "External id": 502055,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141788180.791, "dur": 0.805, + "args": { + "External id": 502056,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141788184.514, "dur": 25.394, + "args": { + "External id": 502057,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141788214.660, "dur": 7.927, + "args": { + "External id": 502058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141788216.156, "dur": 5.801, + "args": { + "External id": 502059,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141788220.678, "dur": 1.062, + "args": { + "External id": 502060,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141788225.294, "dur": 42.069, + "args": { + "External id": 502061,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141788226.324, "dur": 40.298, + "args": { + "External id": 502062,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141788271.381, "dur": 14.256, + "args": { + "External id": 502063,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141788291.819, "dur": 3.907, + "args": { + "External id": 502064,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141788294.076, "dur": 0.700, + "args": { + "External id": 502065,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274141788299.681, "dur": 45.443, + "args": { + "External id": 502066,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141788302.970, "dur": 3.721, + "args": { + "External id": 502067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141788304.043, "dur": 2.069, + "args": { + "External id": 502068,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141788305.520, "dur": 0.487, + "args": { + "External id": 502069,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141788307.196, "dur": 37.539, + "args": { + "External id": 502070,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141788307.920, "dur": 36.219, + "args": { + "External id": 502071,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141788348.999, "dur": 3.808, + "args": { + "External id": 502072,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141788351.429, "dur": 0.358, + "args": { + "External id": 502073,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141788360.250, "dur": 1.461, + "args": { + "External id": 502074,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141788369.280, "dur": 8.192, + "args": { + "External id": 502075,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141788371.291, "dur": 5.897, + "args": { + "External id": 502076,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141788456.707, "dur": 202.664, + "args": { + "External id": 502077,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141788458.980, "dur": 3.874, + "args": { + "External id": 502078,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274141788466.365, "dur": 192.517, + "args": { + "External id": 502079,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274141788467.798, "dur": 0.322, + "args": { + "External id": 502080,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274141788469.174, "dur": 21.778, + "args": { + "External id": 502081,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274141788492.281, "dur": 3.623, + "args": { + "External id": 502082,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141788494.776, "dur": 0.918, + "args": { + "External id": 502083,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141788496.736, "dur": 21.137, + "args": { + "External id": 502084,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274141788497.425, "dur": 1.290, + "args": { + "External id": 502085,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274141788499.709, "dur": 17.888, + "args": { + "External id": 502086,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141788504.202, "dur": 2.407, + "args": { + "External id": 502087,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274141788519.136, "dur": 18.134, + "args": { + "External id": 502088,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141788538.809, "dur": 12.796, + "args": { + "External id": 502089,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274141788556.198, "dur": 12.950, + "args": { + "External id": 502090,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274141788570.695, "dur": 29.123, + "args": { + "External id": 502091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141788602.623, "dur": 24.925, + "args": { + "External id": 502092,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274141788604.815, "dur": 2.005, + "args": { + "External id": 502093,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141788608.969, "dur": 0.747, + "args": { + "External id": 502094,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274141788629.115, "dur": 13.455, + "args": { + "External id": 502095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141788646.015, "dur": 11.426, + "args": { + "External id": 502096,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274141788667.222, "dur": 1.936, + "args": { + "External id": 502097,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141788678.614, "dur": 3.662, + "args": { + "External id": 502098,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141788680.965, "dur": 0.555, + "args": { + "External id": 502099,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141788746.747, "dur": 54.236, + "args": { + "External id": 502100,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274141788805.408, "dur": 4.815, + "args": { + "External id": 502101,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141788808.165, "dur": 0.965, + "args": { + "External id": 502102,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141788811.757, "dur": 24.761, + "args": { + "External id": 502103,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274141788840.897, "dur": 7.554, + "args": { + "External id": 502104,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274141788844.825, "dur": 2.834, + "args": { + "External id": 502105,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141788846.748, "dur": 0.749, + "args": { + "External id": 502106,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274141788850.825, "dur": 44.169, + "args": { + "External id": 502107,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274141788851.745, "dur": 42.545, + "args": { + "External id": 502108,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141788898.696, "dur": 16.381, + "args": { + "External id": 502109,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141788919.248, "dur": 25.930, + "args": { + "External id": 502110,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 16290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274141788921.504, "dur": 23.300, + "args": { + "External id": 502111,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141788928.084, "dur": 0.468, + "args": { + "External id": 502112,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 16292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274141788950.511, "dur": 30.218, + "args": { + "External id": 502113,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 16293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274141788954.662, "dur": 25.844, + "args": { + "External id": 502114,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 16294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141788959.547, "dur": 3.555, + "args": { + "External id": 502115,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274141788964.217, "dur": 15.855, + "args": { + "External id": 502116,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2338711, + "ts": 6274141788992.981, "dur": 4.855, + "args": { + "External id": 502117,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 16297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2338711, + "ts": 6274141788994.724, "dur": 2.824, + "args": { + "External id": 502118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 16298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2338711, + "ts": 6274141788999.125, "dur": 3.573, + "args": { + "External id": 502119,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2338711, + "ts": 6274141789002.024, "dur": 0.590, + "args": { + "External id": 502120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141789043.761, "dur": 63.300, + "args": { + "External id": 502121,"Sequence number": 5516234, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 16301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274141789110.441, "dur": 17.703, + "args": { + "External id": 502122,"Sequence number": 5516235, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 16302 + } + }, + { + "ph": "s", "id": 2, "pid": 2338711, "tid": 2338711, "ts": 6274141789110.441, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274141789240.549, "dur": 41.790, + "args": { + "External id": 502123,"Record function id": 0, "Ev Idx": 16303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338711, "tid": 2338711, + "ts": 6274141789404.031, "dur": 37.379, + "args": { + "External id": 502124,"Sequence number": 5516236, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 16304 + } + }, + { + "ph": "s", "id": 1, "pid": 2338711, "tid": 2338711, "ts": 6274141789404.031, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ones_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141789474.393, "dur": 36.919, + "args": { + "External id": 502125,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 16305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274141789482.256, "dur": 10.713, + "args": { + "External id": 502126,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 16306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274141789486.529, "dur": 5.814, + "args": { + "External id": 502127,"Record function id": 0, "Concrete Inputs": ["[1]", "[1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274141789494.489, "dur": 16.395, + "args": { + "External id": 502128,"Record function id": 0, "Concrete Inputs": ["", "1."], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 16308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338711, "tid": 2338711, + "ts": 6274143896023.570, "dur": 118.634, + "args": { + "External id": 502129,"Sequence number": 5516237, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 16309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338711, "tid": 2338711, + "ts": 6274143896155.706, "dur": 25.345, + "args": { + "External id": 502130,"Sequence number": 5516238, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 16310 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "enumerate(DataLoader)#_StatefulMultiProcessingDataLoaderIter.__next__", "pid": 2338711, "tid": 2338711, + "ts": 6274143896218.886, "dur": 2363.616, + "args": { + "External id": 502131,"Record function id": 0, "Ev Idx": 16311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143897580.916, "dur": 32.731, + "args": { + "External id": 502132,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2338711, "tid": 2338711, + "ts": 6274143897631.691, "dur": 7.455, + "args": { + "External id": 502133,"Sequence number": 5516239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[8, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 16313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143898088.796, "dur": 5.157, + "args": { + "External id": 502134,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2338711, "tid": 2338711, + "ts": 6274143898100.286, "dur": 2.721, + "args": { + "External id": 502135,"Sequence number": 5516239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[8, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 16315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143898463.583, "dur": 1.671, + "args": { + "External id": 502136,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2338711, "tid": 2338711, + "ts": 6274143898471.697, "dur": 2.205, + "args": { + "External id": 502137,"Sequence number": 5516239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[8, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 16317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274143899114.280, "dur": 14.609, + "args": { + "External id": 502138,"Sequence number": 5516239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], []], "Ev Idx": 16318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143899122.177, "dur": 2.765, + "args": { + "External id": 502139,"Record function id": 0, "Concrete Inputs": ["", "[8, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 16319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274143899130.619, "dur": 4.430, + "args": { + "External id": 502140,"Sequence number": 5516239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], []], "Ev Idx": 16320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143899132.878, "dur": 1.049, + "args": { + "External id": 502141,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 16321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274143899162.668, "dur": 15501.891, + "args": { + "External id": 502142,"Sequence number": 5516239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], [], []], "Ev Idx": 16322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274143899169.282, "dur": 15494.504, + "args": { + "External id": 502143,"Sequence number": 5516239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], []], "Ev Idx": 16323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143899174.893, "dur": 10.153, + "args": { + "External id": 502144,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "[4096, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274143899186.951, "dur": 15475.198, + "args": { + "External id": 502145,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 16325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274143899197.124, "dur": 0.805, + "args": { + "External id": 502146,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 16326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2338711, "tid": 2338711, + "ts": 6274143899201.001, "dur": 11.024, + "args": { + "External id": 502147,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[8192, 1], [4096, 1]], "Input Dims": [[8, 4096], [8, 4096]], "Ev Idx": 16327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338711, "tid": 2338711, + "ts": 6274143899204.573, "dur": 7.273, + "args": { + "External id": 502148,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], []], "Input Dims": [[8, 4096], [], []], "Ev Idx": 16328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143899210.958, "dur": 0.632, + "args": { + "External id": 502149,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 16329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2338711, + "ts": 6274143899214.132, "dur": 136.621, + "args": { + "External id": 502150,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 16330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2338711, + "ts": 6274143899215.831, "dur": 134.670, + "args": { + "External id": 502151,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 16331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274143899217.842, "dur": 7.694, + "args": { + "External id": 502152,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 16332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143899220.194, "dur": 4.882, + "args": { + "External id": 502153,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274143899226.316, "dur": 123.804, + "args": { + "External id": 502154,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 16334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274143899352.344, "dur": 15304.622, + "args": { + "External id": 502155,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 16335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274143914685.353, "dur": 351.923, + "args": { + "External id": 502156,"Sequence number": 5516239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], [], []], "Ev Idx": 16336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274143914687.387, "dur": 349.444, + "args": { + "External id": 502157,"Sequence number": 5516239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], []], "Ev Idx": 16337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143914693.593, "dur": 9.693, + "args": { + "External id": 502158,"Record function id": 0, "Concrete Inputs": ["[8, 8192]", "[8192, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274143914704.904, "dur": 330.819, + "args": { + "External id": 502159,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[8192, 1], [8192, 1], []], "Input Dims": [[8, 8192], [8, 8192], []], "Ev Idx": 16339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2338711, "tid": 2338711, + "ts": 6274143915088.506, "dur": 53.451, + "args": { + "External id": 502160,"Record function id": 0, "Concrete Inputs": ["0", "4096", "", "", "", "False"], "Input type": ["Scalar", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143915093.200, "dur": 7.092, + "args": { + "External id": 502161,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2338711, "tid": 2338711, + "ts": 6274143915103.690, "dur": 37.767, + "args": { + "External id": 502162,"Record function id": 0, "Concrete Inputs": ["0", "4096", "1", ""], "Input type": ["Scalar", "Scalar", "Scalar", "long int"], "Input Strides": [[], [], [], [1]], "Input Dims": [[], [], [], [0]], "Ev Idx": 16342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274143915107.572, "dur": 7.084, + "args": { + "External id": 502163,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["long int", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::repeat", "pid": 2338711, "tid": 2338711, + "ts": 6274143915157.063, "dur": 65.378, + "args": { + "External id": 502164,"Sequence number": 5516239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338711, "tid": 2338711, + "ts": 6274143915161.187, "dur": 4.983, + "args": { + "External id": 502165,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[4096], [], []], "Ev Idx": 16345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915163.910, "dur": 1.964, + "args": { + "External id": 502166,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "[4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143915167.559, "dur": 2.815, + "args": { + "External id": 502167,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2338711, + "ts": 6274143915172.019, "dur": 1.884, + "args": { + "External id": 502168,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[4096, 1]], "Input Dims": [[8, 4096]], "Ev Idx": 16348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338711, "tid": 2338711, + "ts": 6274143915178.744, "dur": 6.212, + "args": { + "External id": 502169,"Record function id": 0, "Concrete Inputs": ["", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 16349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915184.106, "dur": 0.684, + "args": { + "External id": 502170,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1]", "[4096, 1, 4096]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 16350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338711, "tid": 2338711, + "ts": 6274143915186.216, "dur": 2.785, + "args": { + "External id": 502171,"Record function id": 0, "Concrete Inputs": ["", "1", "4096", "4096"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[8, 4096, 1], [], [], []], "Ev Idx": 16351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915188.072, "dur": 0.846, + "args": { + "External id": 502172,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "[4096, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[8, 4096, 1], [], [], []], "Ev Idx": 16352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2338711, "tid": 2338711, + "ts": 6274143915190.941, "dur": 3.642, + "args": { + "External id": 502173,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[4096, 1], [4096, 4096, 4096, 1]], "Input Dims": [[1, 4096], [8, 1, 1, 4096]], "Ev Idx": 16353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338711, "tid": 2338711, + "ts": 6274143915192.136, "dur": 2.321, + "args": { + "External id": 502174,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1, 4096], [], []], "Ev Idx": 16354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915193.460, "dur": 0.908, + "args": { + "External id": 502175,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "[0, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 16355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274143915195.397, "dur": 26.349, + "args": { + "External id": 502176,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 4096, 4096, 1], [0, 4096, 4096, 1], []], "Input Dims": [[8, 1, 1, 4096], [8, 1, 1, 4096], []], "Ev Idx": 16356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274143915230.333, "dur": 28.859, + "args": { + "External id": 502177,"Sequence number": 5516239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 16357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274143915231.931, "dur": 27.049, + "args": { + "External id": 502178,"Sequence number": 5516239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], []], "Ev Idx": 16358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915237.132, "dur": 3.087, + "args": { + "External id": 502179,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "[4096, 1]", "3", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274143915241.126, "dur": 17.279, + "args": { + "External id": 502180,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 16360 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_pre_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274143915380.341, "dur": 150.805, + "args": { + "External id": 502181,"Record function id": 0, "Ev Idx": 16361 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::inputs_to_device", "pid": 2338711, "tid": 2338711, + "ts": 6274143915463.962, "dur": 56.139, + "args": { + "External id": 502182,"Record function id": 0, "Ev Idx": 16362 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274143915538.225, "dur": 44.359, + "args": { + "External id": 502183,"Record function id": 0, "Ev Idx": 16363 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274143915608.630, "dur": 14124.759, + "args": { + "External id": 502184,"Record function id": 0, "Ev Idx": 16364 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather", "pid": 2338711, "tid": 2338711, + "ts": 6274143915618.654, "dur": 1551.055, + "args": { + "External id": 502185,"Record function id": 0, "Ev Idx": 16365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143915726.837, "dur": 11.871, + "args": { + "External id": 502186,"Record function id": 0, "Concrete Inputs": ["[141824512]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274143915755.953, "dur": 120.640, + "args": { + "External id": 502187,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[141824512], [], []], "Ev Idx": 16367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915761.505, "dur": 2.016, + "args": { + "External id": 502188,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915767.852, "dur": 0.231, + "args": { + "External id": 502189,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915769.262, "dur": 0.339, + "args": { + "External id": 502190,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "16384512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915770.795, "dur": 2.283, + "args": { + "External id": 502191,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "18481664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915774.556, "dur": 0.444, + "args": { + "External id": 502192,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19005952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915776.631, "dur": 0.419, + "args": { + "External id": 502193,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "19530240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915779.883, "dur": 0.390, + "args": { + "External id": 502194,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915781.257, "dur": 2.698, + "args": { + "External id": 502195,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "21627904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915784.934, "dur": 0.461, + "args": { + "External id": 502196,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "28967936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915788.493, "dur": 0.324, + "args": { + "External id": 502197,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "36307968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915789.758, "dur": 0.378, + "args": { + "External id": 502198,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915791.424, "dur": 2.022, + "args": { + "External id": 502199,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "43648512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915794.615, "dur": 0.283, + "args": { + "External id": 502200,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45745664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915796.127, "dur": 0.156, + "args": { + "External id": 502201,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46269952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915799.191, "dur": 0.323, + "args": { + "External id": 502202,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "46794240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915800.733, "dur": 2.739, + "args": { + "External id": 502203,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915804.515, "dur": 0.366, + "args": { + "External id": 502204,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "48891904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915807.868, "dur": 0.340, + "args": { + "External id": 502205,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "56231936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915809.242, "dur": 0.255, + "args": { + "External id": 502206,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "63571968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915810.597, "dur": 2.082, + "args": { + "External id": 502207,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915813.809, "dur": 0.426, + "args": { + "External id": 502208,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "70912512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915815.440, "dur": 0.260, + "args": { + "External id": 502209,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73009664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915818.651, "dur": 0.351, + "args": { + "External id": 502210,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73533952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915819.976, "dur": 2.618, + "args": { + "External id": 502211,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "74058240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915823.544, "dur": 0.503, + "args": { + "External id": 502212,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915827.411, "dur": 0.244, + "args": { + "External id": 502213,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "76155904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915828.804, "dur": 0.500, + "args": { + "External id": 502214,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "83495936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915830.363, "dur": 2.359, + "args": { + "External id": 502215,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "90835968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915833.988, "dur": 0.387, + "args": { + "External id": 502216,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915835.617, "dur": 0.492, + "args": { + "External id": 502217,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "98176512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915839.359, "dur": 0.358, + "args": { + "External id": 502218,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100273664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915841.065, "dur": 2.517, + "args": { + "External id": 502219,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100797952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915844.692, "dur": 0.462, + "args": { + "External id": 502220,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "101322240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915847.947, "dur": 0.252, + "args": { + "External id": 502221,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915849.529, "dur": 0.497, + "args": { + "External id": 502222,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "103419904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915851.235, "dur": 2.185, + "args": { + "External id": 502223,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "110759936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915854.453, "dur": 0.384, + "args": { + "External id": 502224,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "118099968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915856.162, "dur": 0.407, + "args": { + "External id": 502225,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143915859.440, "dur": 0.260, + "args": { + "External id": 502226,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "125440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274143915904.215, "dur": 143.274, + "args": { + "External id": 502227,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 16407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274143916174.305, "dur": 357.963, + "args": { + "External id": 502228,"Record function id": 0, "Concrete Inputs": ["", "", "141824512", "8", "5", "15", ""], "Input type": ["TensorList", "", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 16408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143916190.272, "dur": 10.057, + "args": { + "External id": 502229,"Record function id": 0, "Concrete Inputs": ["[1134596096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274143916206.304, "dur": 16.338, + "args": { + "External id": 502230,"Record function id": 0, "Concrete Inputs": ["", "0", "709122560", "141824512"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1134596096], [], [], []], "Ev Idx": 16410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274143916213.701, "dur": 8.534, + "args": { + "External id": 502231,"Record function id": 0, "Concrete Inputs": ["", "0", "709122560", "850947072", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[1134596096], [], [], [], []], "Ev Idx": 16411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916217.638, "dur": 0.964, + "args": { + "External id": 502232,"Record function id": 0, "Concrete Inputs": ["", "[141824512]", "[1]", "709122560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1134596096], [], [], []], "Ev Idx": 16412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274143916231.777, "dur": 108.808, + "args": { + "External id": 502233,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[141824512], [], []], "Ev Idx": 16413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916234.305, "dur": 0.580, + "args": { + "External id": 502234,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "709122560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916236.276, "dur": 2.271, + "args": { + "External id": 502235,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "725506560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916239.740, "dur": 0.601, + "args": { + "External id": 502236,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "725507072"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916241.594, "dur": 0.365, + "args": { + "External id": 502237,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "727604224"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916244.807, "dur": 0.268, + "args": { + "External id": 502238,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "728128512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916246.374, "dur": 0.502, + "args": { + "External id": 502239,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "728652800"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916247.699, "dur": 2.882, + "args": { + "External id": 502240,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "730749952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916253.752, "dur": 0.406, + "args": { + "External id": 502241,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "730750464"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916255.305, "dur": 0.447, + "args": { + "External id": 502242,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "738090496"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916256.918, "dur": 1.677, + "args": { + "External id": 502243,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "745430528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916259.663, "dur": 0.461, + "args": { + "External id": 502244,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "752770560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916261.338, "dur": 0.480, + "args": { + "External id": 502245,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "752771072"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916264.533, "dur": 0.283, + "args": { + "External id": 502246,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "754868224"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916266.251, "dur": 0.380, + "args": { + "External id": 502247,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "755392512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916267.700, "dur": 2.766, + "args": { + "External id": 502248,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "755916800"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916273.176, "dur": 0.561, + "args": { + "External id": 502249,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "758013952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916274.777, "dur": 0.536, + "args": { + "External id": 502250,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "758014464"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916276.428, "dur": 2.424, + "args": { + "External id": 502251,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "765354496"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916279.879, "dur": 0.569, + "args": { + "External id": 502252,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "772694528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916281.395, "dur": 0.292, + "args": { + "External id": 502253,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "780034560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916284.641, "dur": 0.305, + "args": { + "External id": 502254,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "780035072"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916286.264, "dur": 0.496, + "args": { + "External id": 502255,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "782132224"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916287.591, "dur": 2.206, + "args": { + "External id": 502256,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "782656512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916292.567, "dur": 0.466, + "args": { + "External id": 502257,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "783180800"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916294.249, "dur": 0.405, + "args": { + "External id": 502258,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "785277952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916295.840, "dur": 2.154, + "args": { + "External id": 502259,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "785278464"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916298.998, "dur": 0.507, + "args": { + "External id": 502260,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "792618496"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916300.645, "dur": 0.367, + "args": { + "External id": 502261,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "799958528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916303.694, "dur": 0.334, + "args": { + "External id": 502262,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "807298560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916305.205, "dur": 0.302, + "args": { + "External id": 502263,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "807299072"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916306.448, "dur": 2.290, + "args": { + "External id": 502264,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "809396224"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916311.454, "dur": 0.532, + "args": { + "External id": 502265,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "809920512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916312.951, "dur": 0.327, + "args": { + "External id": 502266,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "810444800"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916314.308, "dur": 2.090, + "args": { + "External id": 502267,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "812541952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916317.639, "dur": 0.373, + "args": { + "External id": 502268,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "812542464"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916318.857, "dur": 0.254, + "args": { + "External id": 502269,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "819882496"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916321.789, "dur": 0.286, + "args": { + "External id": 502270,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "827222528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916323.221, "dur": 0.466, + "args": { + "External id": 502271,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "834562560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143916324.495, "dur": 2.385, + "args": { + "External id": 502272,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "834563072"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274143916390.644, "dur": 124.892, + "args": { + "External id": 508417,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 16453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274143916616.621, "dur": 383.904, + "args": { + "External id": 508418,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[1134596096], [141824512], [], [], []], "Ev Idx": 16454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274143916651.082, "dur": 343.566, + "args": { + "External id": 508419,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1134596096, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[141824512], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16455, "In msg nelems": 141824512 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274143916663.905, "dur": 324.632, + "args": { + "External id": 508420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[141824512]], "Ev Idx": 16456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274143917031.383, "dur": 2.464, + "args": { + "External id": 508421,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16457, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out", "pid": 2338711, "tid": 2338711, + "ts": 6274143917188.486, "dur": 12333.975, + "args": { + "External id": 508422,"Record function id": 0, "Ev Idx": 16458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917369.845, "dur": 7.354, + "args": { + "External id": 508423,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1134596096], []], "Ev Idx": 16459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917381.186, "dur": 1.122, + "args": { + "External id": 508424,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[131072000], []], "Ev Idx": 16460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917384.048, "dur": 1.072, + "args": { + "External id": 508425,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917387.177, "dur": 1.085, + "args": { + "External id": 508426,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917389.851, "dur": 0.973, + "args": { + "External id": 508427,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917392.410, "dur": 1.204, + "args": { + "External id": 508428,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917397.381, "dur": 0.812, + "args": { + "External id": 508429,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917399.786, "dur": 4.962, + "args": { + "External id": 508430,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917406.290, "dur": 1.045, + "args": { + "External id": 508431,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917408.830, "dur": 1.111, + "args": { + "External id": 508432,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917413.617, "dur": 0.908, + "args": { + "External id": 508433,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917415.807, "dur": 0.932, + "args": { + "External id": 508434,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917418.379, "dur": 0.680, + "args": { + "External id": 508435,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917420.576, "dur": 0.722, + "args": { + "External id": 508436,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917424.717, "dur": 0.854, + "args": { + "External id": 508437,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917426.890, "dur": 4.078, + "args": { + "External id": 508438,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917432.568, "dur": 1.025, + "args": { + "External id": 508439,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917435.311, "dur": 0.561, + "args": { + "External id": 508440,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917443.168, "dur": 0.890, + "args": { + "External id": 508441,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917445.643, "dur": 0.660, + "args": { + "External id": 508442,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917447.811, "dur": 0.827, + "args": { + "External id": 508443,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917450.202, "dur": 0.881, + "args": { + "External id": 508444,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917454.690, "dur": 0.742, + "args": { + "External id": 508445,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917456.890, "dur": 4.202, + "args": { + "External id": 508446,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917462.457, "dur": 0.831, + "args": { + "External id": 508447,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917464.690, "dur": 0.709, + "args": { + "External id": 508448,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917469.075, "dur": 0.924, + "args": { + "External id": 508449,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917471.364, "dur": 0.698, + "args": { + "External id": 508450,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917473.662, "dur": 0.711, + "args": { + "External id": 508451,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917475.804, "dur": 0.778, + "args": { + "External id": 508452,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917479.926, "dur": 0.821, + "args": { + "External id": 508453,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917481.992, "dur": 4.170, + "args": { + "External id": 508454,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917487.414, "dur": 0.763, + "args": { + "External id": 508455,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917489.488, "dur": 0.939, + "args": { + "External id": 508456,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917494.133, "dur": 0.866, + "args": { + "External id": 508457,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917496.170, "dur": 0.947, + "args": { + "External id": 508458,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917498.553, "dur": 0.968, + "args": { + "External id": 508459,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917501.005, "dur": 0.819, + "args": { + "External id": 508460,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917505.459, "dur": 1.264, + "args": { + "External id": 508461,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143917507.923, "dur": 3.722, + "args": { + "External id": 508462,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[131072000], []], "Ev Idx": 16498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274143917538.576, "dur": 11924.224, + "args": { + "External id": 508463,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[141824512, 1], [], [], []], "Input Dims": [[8, 141824512], [], [], []], "Ev Idx": 16499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274143917561.303, "dur": 11890.718, + "args": { + "External id": 508464,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[141824512, 1], [], [], []], "Input Dims": [[8, 141824512], [], [], []], "Ev Idx": 16500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143917583.969, "dur": 42.335, + "args": { + "External id": 508465,"Record function id": 0, "Concrete Inputs": ["[4384]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274143917633.929, "dur": 11773.568, + "args": { + "External id": 508466,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[4384], [], [], [], [], [], [], []], "Ev Idx": 16502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274143917636.460, "dur": 11770.361, + "args": { + "External id": 508467,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[4384], [], [], [], [], [], []], "Ev Idx": 16503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143917642.512, "dur": 7.179, + "args": { + "External id": 508468,"Record function id": 0, "Concrete Inputs": ["[4384]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274143917653.525, "dur": 11750.072, + "args": { + "External id": 508469,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4384], [4384], []], "Ev Idx": 16505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274143929819.620, "dur": 37.359, + "args": { + "External id": 508470,"Record function id": 0, "Ev Idx": 16506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 0/0", "pid": 2338711, "tid": 2338711, + "ts": 6274143929858.457, "dur": 254.865, + "args": { + "External id": 508471,"Record function id": 0, "Ev Idx": 16507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274143929904.044, "dur": 198.108, + "args": { + "External id": 508472,"Sequence number": 5516239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "long int"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32000, 4096], [8, 4096]], "Ev Idx": 16508 + } + }, + { + "ph": "s", "id": 448, "pid": 2338711, "tid": 2338711, "ts": 6274143929904.044, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_0", "pid": 2338711, "tid": 2338711, + "ts": 6274143929976.589, "dur": 53.313, + "args": { + "External id": 508473,"kernel_hash": "cwgoxjzj7t5qtecsjcfbkrl7ttzfg44hx3z6lth7z6syiunc2ng5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wg/cwgoxjzj7t5qtecsjcfbkrl7ttzfg44hx3z6lth7z6syiunc2ng5.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096], [32000, 4096], [8, 4096, 4096], []], "Ev Idx": 16509 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274143930183.368, "dur": 58.905, + "args": { + "External id": 508474,"Record function id": 0, "Ev Idx": 16510 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.0)", "pid": 2338711, "tid": 2338711, + "ts": 6274143930252.350, "dur": 8222.288, + "args": { + "External id": 508475,"Record function id": 0, "Ev Idx": 16511 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2338711, "tid": 2338711, + "ts": 6274143930259.706, "dur": 929.552, + "args": { + "External id": 508476,"Record function id": 0, "Ev Idx": 16512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143930333.866, "dur": 10.740, + "args": { + "External id": 508477,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274143930357.568, "dur": 38.817, + "args": { + "External id": 508478,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143930365.439, "dur": 2.613, + "args": { + "External id": 508479,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143930369.835, "dur": 0.604, + "args": { + "External id": 508480,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143930371.578, "dur": 2.604, + "args": { + "External id": 508481,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143930375.366, "dur": 2.823, + "args": { + "External id": 508482,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143930379.198, "dur": 0.253, + "args": { + "External id": 508483,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143930382.455, "dur": 0.248, + "args": { + "External id": 508484,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143930383.800, "dur": 0.446, + "args": { + "External id": 508485,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143930385.163, "dur": 0.438, + "args": { + "External id": 508486,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143930389.495, "dur": 0.464, + "args": { + "External id": 508487,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274143930407.788, "dur": 69.329, + "args": { + "External id": 508488,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274143930509.728, "dur": 148.575, + "args": { + "External id": 508489,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143930521.588, "dur": 4.849, + "args": { + "External id": 508490,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274143930531.745, "dur": 10.596, + "args": { + "External id": 508491,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274143930536.322, "dur": 5.594, + "args": { + "External id": 508492,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143930539.905, "dur": 0.715, + "args": { + "External id": 508493,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274143930549.316, "dur": 30.030, + "args": { + "External id": 508494,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143930551.128, "dur": 2.732, + "args": { + "External id": 508495,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143930555.078, "dur": 0.749, + "args": { + "External id": 508496,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143930558.960, "dur": 0.457, + "args": { + "External id": 508497,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143930560.440, "dur": 0.540, + "args": { + "External id": 508498,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143930562.256, "dur": 2.629, + "args": { + "External id": 508499,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143930565.748, "dur": 0.632, + "args": { + "External id": 508500,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143930567.747, "dur": 0.383, + "args": { + "External id": 508501,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143930570.879, "dur": 0.520, + "args": { + "External id": 508502,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143930572.391, "dur": 2.719, + "args": { + "External id": 508503,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274143930608.231, "dur": 39.800, + "args": { + "External id": 508504,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274143930713.467, "dur": 330.884, + "args": { + "External id": 508505,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274143930745.511, "dur": 294.053, + "args": { + "External id": 508506,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16542, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274143930755.274, "dur": 278.490, + "args": { + "External id": 508507,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274143931109.157, "dur": 3.324, + "args": { + "External id": 508508,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16544, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2338711, "tid": 2338711, + "ts": 6274143931210.873, "dur": 7069.442, + "args": { + "External id": 508509,"Record function id": 0, "Ev Idx": 16545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143931309.073, "dur": 7.070, + "args": { + "External id": 508510,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143931319.809, "dur": 1.220, + "args": { + "External id": 508511,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143931322.621, "dur": 1.042, + "args": { + "External id": 508512,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143931325.425, "dur": 1.252, + "args": { + "External id": 508513,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143931327.952, "dur": 1.121, + "args": { + "External id": 508514,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143931332.963, "dur": 0.977, + "args": { + "External id": 508515,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143931335.593, "dur": 0.985, + "args": { + "External id": 508516,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143931338.087, "dur": 4.451, + "args": { + "External id": 508517,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143931343.798, "dur": 0.681, + "args": { + "External id": 508518,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143931348.340, "dur": 0.946, + "args": { + "External id": 508519,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274143931376.421, "dur": 6864.229, + "args": { + "External id": 508520,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274143931392.626, "dur": 6840.616, + "args": { + "External id": 508521,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143931407.215, "dur": 15.170, + "args": { + "External id": 508522,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274143931426.272, "dur": 6769.108, + "args": { + "External id": 508523,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274143931429.007, "dur": 6765.789, + "args": { + "External id": 508524,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143931434.866, "dur": 5.544, + "args": { + "External id": 508525,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274143931442.388, "dur": 6749.067, + "args": { + "External id": 508526,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274143938421.519, "dur": 29.771, + "args": { + "External id": 508527,"Sequence number": 5516240, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16563 + } + }, + { + "ph": "s", "id": 447, "pid": 2338711, "tid": 2338711, "ts": 6274143938421.519, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274143938438.522, "dur": 8.167, + "args": { + "External id": 508528,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143938442.367, "dur": 4.098, + "args": { + "External id": 508529,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274143938516.666, "dur": 111.785, + "args": { + "External id": 508530,"Record function id": 0, "Ev Idx": 16566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274143938631.614, "dur": 1144.633, + "args": { + "External id": 508531,"Record function id": 0, "Ev Idx": 16567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274143938672.575, "dur": 1089.754, + "args": { + "External id": 508532,"Sequence number": 5516241, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16568 + } + }, + { + "ph": "s", "id": 446, "pid": 2338711, "tid": 2338711, "ts": 6274143938672.575, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274143938746.906, "dur": 50.367, + "args": { + "External id": 508533,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274143938811.171, "dur": 110.149, + "args": { + "External id": 508534,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274143938929.946, "dur": 37.235, + "args": { + "External id": 508535,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274143938977.628, "dur": 30.213, + "args": { + "External id": 508536,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274143939036.584, "dur": 61.126, + "args": { + "External id": 508537,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274143939122.959, "dur": 21.756, + "args": { + "External id": 508538,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274143939167.716, "dur": 138.969, + "args": { + "External id": 508539,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274143939224.940, "dur": 14.486, + "args": { + "External id": 508540,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143939230.445, "dur": 8.157, + "args": { + "External id": 508541,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143939242.202, "dur": 3.788, + "args": { + "External id": 508542,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143939247.323, "dur": 1.265, + "args": { + "External id": 508543,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143939251.217, "dur": 2.952, + "args": { + "External id": 508544,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274143939318.066, "dur": 54.305, + "args": { + "External id": 508545,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274143939402.771, "dur": 29.434, + "args": { + "External id": 508546,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274143939441.177, "dur": 41.241, + "args": { + "External id": 508547,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274143939490.642, "dur": 34.377, + "args": { + "External id": 508548,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274143939547.965, "dur": 24.403, + "args": { + "External id": 508549,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274143939580.279, "dur": 56.623, + "args": { + "External id": 508550,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274143939659.770, "dur": 19.421, + "args": { + "External id": 508551,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16587 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.0)", "pid": 2338711, "tid": 2338711, + "ts": 6274143939845.908, "dur": 86.806, + "args": { + "External id": 508552,"Record function id": 0, "Ev Idx": 16588 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274143940006.409, "dur": 84.154, + "args": { + "External id": 508553,"Record function id": 0, "Ev Idx": 16589 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.1)", "pid": 2338711, "tid": 2338711, + "ts": 6274143940102.525, "dur": 29477.473, + "args": { + "External id": 508554,"Record function id": 0, "Ev Idx": 16590 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2338711, "tid": 2338711, + "ts": 6274143940111.643, "dur": 980.668, + "args": { + "External id": 508555,"Record function id": 0, "Ev Idx": 16591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143940194.917, "dur": 10.374, + "args": { + "External id": 508556,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274143940219.156, "dur": 39.675, + "args": { + "External id": 508557,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143940224.764, "dur": 4.455, + "args": { + "External id": 508558,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143940231.431, "dur": 0.614, + "args": { + "External id": 508559,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143940232.992, "dur": 0.701, + "args": { + "External id": 508560,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143940237.041, "dur": 0.417, + "args": { + "External id": 508561,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143940238.414, "dur": 0.518, + "args": { + "External id": 508562,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143940240.078, "dur": 0.357, + "args": { + "External id": 508563,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143940243.853, "dur": 3.021, + "args": { + "External id": 508564,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143940248.074, "dur": 0.622, + "args": { + "External id": 508565,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143940249.526, "dur": 2.405, + "args": { + "External id": 508566,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274143940278.732, "dur": 77.273, + "args": { + "External id": 508567,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274143940390.869, "dur": 121.002, + "args": { + "External id": 508568,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143940401.617, "dur": 6.347, + "args": { + "External id": 508569,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274143940413.481, "dur": 10.746, + "args": { + "External id": 508570,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274143940418.280, "dur": 5.557, + "args": { + "External id": 508571,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143940421.985, "dur": 0.512, + "args": { + "External id": 508572,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274143940430.626, "dur": 26.448, + "args": { + "External id": 508573,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143940432.247, "dur": 0.552, + "args": { + "External id": 508574,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143940434.072, "dur": 2.547, + "args": { + "External id": 508575,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143940437.694, "dur": 0.573, + "args": { + "External id": 508576,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143940439.272, "dur": 2.562, + "args": { + "External id": 508577,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143940444.641, "dur": 0.511, + "args": { + "External id": 508578,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143940446.219, "dur": 0.347, + "args": { + "External id": 508579,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143940447.648, "dur": 0.310, + "args": { + "External id": 508580,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143940450.547, "dur": 0.470, + "args": { + "External id": 508581,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143940451.792, "dur": 0.627, + "args": { + "External id": 508582,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274143940469.096, "dur": 34.602, + "args": { + "External id": 508583,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274143940568.192, "dur": 388.239, + "args": { + "External id": 508584,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274143940619.151, "dur": 331.325, + "args": { + "External id": 508585,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16621, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274143940630.726, "dur": 314.386, + "args": { + "External id": 508586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274143940983.610, "dur": 2.160, + "args": { + "External id": 508587,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16623, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2338711, "tid": 2338711, + "ts": 6274143941117.485, "dur": 28260.748, + "args": { + "External id": 508588,"Record function id": 0, "Ev Idx": 16624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143941219.636, "dur": 6.677, + "args": { + "External id": 508589,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143941230.175, "dur": 1.191, + "args": { + "External id": 508590,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143941233.187, "dur": 3.160, + "args": { + "External id": 508591,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143941238.160, "dur": 0.899, + "args": { + "External id": 508592,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143941240.693, "dur": 0.722, + "args": { + "External id": 508593,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143941244.791, "dur": 1.101, + "args": { + "External id": 508594,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143941247.225, "dur": 0.724, + "args": { + "External id": 508595,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143941249.194, "dur": 2.633, + "args": { + "External id": 508596,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143941253.281, "dur": 0.841, + "args": { + "External id": 508597,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143941257.379, "dur": 0.787, + "args": { + "External id": 508598,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274143941276.552, "dur": 28056.107, + "args": { + "External id": 508599,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274143941292.535, "dur": 28031.850, + "args": { + "External id": 508600,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143941309.698, "dur": 16.857, + "args": { + "External id": 508601,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274143941330.130, "dur": 27957.631, + "args": { + "External id": 508602,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274143941332.922, "dur": 27954.245, + "args": { + "External id": 508603,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143941338.767, "dur": 5.127, + "args": { + "External id": 508604,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274143941345.682, "dur": 27938.207, + "args": { + "External id": 508605,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274143969518.329, "dur": 36.004, + "args": { + "External id": 508606,"Sequence number": 5516242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16642 + } + }, + { + "ph": "s", "id": 445, "pid": 2338711, "tid": 2338711, "ts": 6274143969518.329, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274143969537.993, "dur": 11.517, + "args": { + "External id": 508607,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143969544.980, "dur": 4.277, + "args": { + "External id": 508608,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274143969636.065, "dur": 78.227, + "args": { + "External id": 508609,"Record function id": 0, "Ev Idx": 16645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274143969716.113, "dur": 1128.870, + "args": { + "External id": 508610,"Record function id": 0, "Ev Idx": 16646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274143969755.577, "dur": 1075.899, + "args": { + "External id": 508611,"Sequence number": 5516243, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16647 + } + }, + { + "ph": "s", "id": 444, "pid": 2338711, "tid": 2338711, "ts": 6274143969755.577, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274143969831.940, "dur": 49.827, + "args": { + "External id": 508612,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274143969894.792, "dur": 105.123, + "args": { + "External id": 508613,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274143970013.177, "dur": 78.821, + "args": { + "External id": 508614,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274143970102.868, "dur": 34.928, + "args": { + "External id": 508615,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274143970165.419, "dur": 24.013, + "args": { + "External id": 508616,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274143970210.050, "dur": 16.363, + "args": { + "External id": 508617,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274143970251.383, "dur": 137.338, + "args": { + "External id": 508618,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274143970303.741, "dur": 12.851, + "args": { + "External id": 508619,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143970309.390, "dur": 6.355, + "args": { + "External id": 508620,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143970319.505, "dur": 4.100, + "args": { + "External id": 508621,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143970325.197, "dur": 1.405, + "args": { + "External id": 508622,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143970331.183, "dur": 5.482, + "args": { + "External id": 508623,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274143970399.086, "dur": 48.853, + "args": { + "External id": 508624,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274143970477.681, "dur": 28.856, + "args": { + "External id": 508625,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274143970516.619, "dur": 42.835, + "args": { + "External id": 508626,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274143970565.741, "dur": 52.190, + "args": { + "External id": 508627,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274143970644.598, "dur": 27.977, + "args": { + "External id": 508628,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274143970678.962, "dur": 39.682, + "args": { + "External id": 508629,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274143970739.568, "dur": 19.171, + "args": { + "External id": 508630,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16666 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.1)", "pid": 2338711, "tid": 2338711, + "ts": 6274143970911.993, "dur": 81.098, + "args": { + "External id": 508631,"Record function id": 0, "Ev Idx": 16667 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274143971109.005, "dur": 51.072, + "args": { + "External id": 508632,"Record function id": 0, "Ev Idx": 16668 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.2)", "pid": 2338711, "tid": 2338711, + "ts": 6274143971169.873, "dur": 37059.411, + "args": { + "External id": 508633,"Record function id": 0, "Ev Idx": 16669 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2338711, "tid": 2338711, + "ts": 6274143971177.902, "dur": 948.564, + "args": { + "External id": 508634,"Record function id": 0, "Ev Idx": 16670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143971257.032, "dur": 9.672, + "args": { + "External id": 508635,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274143971279.711, "dur": 40.681, + "args": { + "External id": 508636,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143971286.173, "dur": 2.287, + "args": { + "External id": 508637,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143971292.769, "dur": 0.342, + "args": { + "External id": 508638,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143971294.275, "dur": 0.731, + "args": { + "External id": 508639,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143971295.958, "dur": 0.489, + "args": { + "External id": 508640,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143971300.200, "dur": 0.463, + "args": { + "External id": 508641,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143971302.155, "dur": 0.601, + "args": { + "External id": 508642,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143971304.591, "dur": 3.862, + "args": { + "External id": 508643,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143971309.820, "dur": 0.297, + "args": { + "External id": 508644,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143971311.783, "dur": 0.255, + "args": { + "External id": 508645,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274143971330.743, "dur": 55.507, + "args": { + "External id": 508646,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274143971419.357, "dur": 133.893, + "args": { + "External id": 508647,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143971430.684, "dur": 4.938, + "args": { + "External id": 508648,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274143971441.203, "dur": 10.728, + "args": { + "External id": 508649,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274143971445.875, "dur": 5.679, + "args": { + "External id": 508650,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143971449.768, "dur": 0.563, + "args": { + "External id": 508651,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274143971459.648, "dur": 35.148, + "args": { + "External id": 508652,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143971461.681, "dur": 2.660, + "args": { + "External id": 508653,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143971466.000, "dur": 0.458, + "args": { + "External id": 508654,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143971468.236, "dur": 0.560, + "args": { + "External id": 508655,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143971472.702, "dur": 2.287, + "args": { + "External id": 508656,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143971476.640, "dur": 0.514, + "args": { + "External id": 508657,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143971478.263, "dur": 0.419, + "args": { + "External id": 508658,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143971482.991, "dur": 0.316, + "args": { + "External id": 508659,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143971484.665, "dur": 0.266, + "args": { + "External id": 508660,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143971487.171, "dur": 2.099, + "args": { + "External id": 508661,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274143971509.915, "dur": 35.504, + "args": { + "External id": 508662,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274143971625.294, "dur": 363.990, + "args": { + "External id": 508663,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274143971655.955, "dur": 328.310, + "args": { + "External id": 508664,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16700, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274143971669.724, "dur": 309.008, + "args": { + "External id": 508665,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274143972012.486, "dur": 2.073, + "args": { + "External id": 508666,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16702, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2338711, "tid": 2338711, + "ts": 6274143972150.649, "dur": 35857.334, + "args": { + "External id": 508667,"Record function id": 0, "Ev Idx": 16703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143972256.318, "dur": 6.883, + "args": { + "External id": 508668,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143972267.130, "dur": 1.036, + "args": { + "External id": 508669,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143972270.039, "dur": 3.729, + "args": { + "External id": 508670,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143972275.502, "dur": 0.892, + "args": { + "External id": 508671,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143972277.710, "dur": 1.204, + "args": { + "External id": 508672,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143972280.614, "dur": 1.045, + "args": { + "External id": 508673,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143972285.144, "dur": 1.210, + "args": { + "External id": 508674,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143972287.788, "dur": 2.484, + "args": { + "External id": 508675,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143972291.723, "dur": 0.843, + "args": { + "External id": 508676,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274143972293.968, "dur": 0.715, + "args": { + "External id": 508677,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274143972315.758, "dur": 35643.878, + "args": { + "External id": 508678,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274143972332.740, "dur": 35619.165, + "args": { + "External id": 508679,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274143972348.479, "dur": 16.017, + "args": { + "External id": 508680,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274143972368.146, "dur": 35547.249, + "args": { + "External id": 508681,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274143972370.848, "dur": 35543.916, + "args": { + "External id": 508682,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274143972377.508, "dur": 5.394, + "args": { + "External id": 508683,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274143972384.693, "dur": 35526.597, + "args": { + "External id": 508684,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144008172.105, "dur": 31.429, + "args": { + "External id": 508685,"Sequence number": 5516244, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16721 + } + }, + { + "ph": "s", "id": 443, "pid": 2338711, "tid": 2338711, "ts": 6274144008172.105, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144008189.805, "dur": 8.925, + "args": { + "External id": 508686,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144008193.823, "dur": 4.553, + "args": { + "External id": 508687,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144008270.656, "dur": 73.929, + "args": { + "External id": 508688,"Record function id": 0, "Ev Idx": 16724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144008346.093, "dur": 1136.107, + "args": { + "External id": 508689,"Record function id": 0, "Ev Idx": 16725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144008387.946, "dur": 1079.414, + "args": { + "External id": 508690,"Sequence number": 5516245, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16726 + } + }, + { + "ph": "s", "id": 442, "pid": 2338711, "tid": 2338711, "ts": 6274144008387.946, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144008454.098, "dur": 49.120, + "args": { + "External id": 508691,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144008516.005, "dur": 125.325, + "args": { + "External id": 508692,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144008656.539, "dur": 42.814, + "args": { + "External id": 508693,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144008708.375, "dur": 31.469, + "args": { + "External id": 508694,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144008768.335, "dur": 25.786, + "args": { + "External id": 508695,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144008814.513, "dur": 17.760, + "args": { + "External id": 508696,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144008855.918, "dur": 136.075, + "args": { + "External id": 508697,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144008909.684, "dur": 11.811, + "args": { + "External id": 508698,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144008915.466, "dur": 5.236, + "args": { + "External id": 508699,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144008924.490, "dur": 3.778, + "args": { + "External id": 508700,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144008929.813, "dur": 0.951, + "args": { + "External id": 508701,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144008933.111, "dur": 6.938, + "args": { + "External id": 508702,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144009003.055, "dur": 48.640, + "args": { + "External id": 508703,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144009126.637, "dur": 32.507, + "args": { + "External id": 508704,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144009170.042, "dur": 47.277, + "args": { + "External id": 508705,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144009226.826, "dur": 35.527, + "args": { + "External id": 508706,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144009286.576, "dur": 25.425, + "args": { + "External id": 508707,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144009320.210, "dur": 36.485, + "args": { + "External id": 508708,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144009374.424, "dur": 18.298, + "args": { + "External id": 508709,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16745 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.2)", "pid": 2338711, "tid": 2338711, + "ts": 6274144009555.320, "dur": 105.455, + "args": { + "External id": 508710,"Record function id": 0, "Ev Idx": 16746 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144009739.069, "dur": 48.064, + "args": { + "External id": 508711,"Record function id": 0, "Ev Idx": 16747 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.3)", "pid": 2338711, "tid": 2338711, + "ts": 6274144009796.151, "dur": 32202.225, + "args": { + "External id": 508712,"Record function id": 0, "Ev Idx": 16748 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2338711, "tid": 2338711, + "ts": 6274144009804.235, "dur": 972.363, + "args": { + "External id": 508713,"Record function id": 0, "Ev Idx": 16749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144009885.725, "dur": 9.357, + "args": { + "External id": 508714,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144009909.362, "dur": 39.834, + "args": { + "External id": 508715,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144009914.991, "dur": 2.403, + "args": { + "External id": 508716,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144009922.349, "dur": 0.454, + "args": { + "External id": 508717,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144009923.959, "dur": 0.577, + "args": { + "External id": 508718,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144009926.177, "dur": 0.586, + "args": { + "External id": 508719,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144009929.887, "dur": 0.489, + "args": { + "External id": 508720,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144009932.102, "dur": 0.565, + "args": { + "External id": 508721,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144009933.860, "dur": 4.934, + "args": { + "External id": 508722,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144009940.491, "dur": 0.374, + "args": { + "External id": 508723,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144009942.114, "dur": 0.379, + "args": { + "External id": 508724,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144009961.690, "dur": 58.273, + "args": { + "External id": 508725,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144010089.438, "dur": 137.362, + "args": { + "External id": 508726,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144010103.270, "dur": 5.381, + "args": { + "External id": 508727,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144010114.743, "dur": 12.425, + "args": { + "External id": 508728,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144010119.965, "dur": 6.813, + "args": { + "External id": 508729,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144010124.343, "dur": 0.710, + "args": { + "External id": 508730,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144010135.445, "dur": 32.222, + "args": { + "External id": 508731,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144010137.760, "dur": 2.843, + "args": { + "External id": 508732,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144010142.321, "dur": 0.549, + "args": { + "External id": 508733,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144010143.978, "dur": 0.556, + "args": { + "External id": 508734,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144010148.303, "dur": 2.876, + "args": { + "External id": 508735,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144010152.102, "dur": 0.576, + "args": { + "External id": 508736,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144010154.115, "dur": 0.452, + "args": { + "External id": 508737,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144010157.242, "dur": 0.502, + "args": { + "External id": 508738,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144010159.100, "dur": 0.341, + "args": { + "External id": 508739,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144010160.807, "dur": 2.196, + "args": { + "External id": 508740,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144010183.485, "dur": 34.573, + "args": { + "External id": 508741,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144010283.925, "dur": 386.188, + "args": { + "External id": 508742,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144010317.855, "dur": 346.228, + "args": { + "External id": 508743,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16779, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144010328.259, "dur": 329.576, + "args": { + "External id": 508744,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144010698.995, "dur": 2.317, + "args": { + "External id": 508745,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16781, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2338711, "tid": 2338711, + "ts": 6274144010797.686, "dur": 31007.577, + "args": { + "External id": 508746,"Record function id": 0, "Ev Idx": 16782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144010895.259, "dur": 6.373, + "args": { + "External id": 508747,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144010905.287, "dur": 1.357, + "args": { + "External id": 508748,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144010908.487, "dur": 3.892, + "args": { + "External id": 508749,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144010914.008, "dur": 1.129, + "args": { + "External id": 508750,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144010916.652, "dur": 1.511, + "args": { + "External id": 508751,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144010919.711, "dur": 1.187, + "args": { + "External id": 508752,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144010924.623, "dur": 0.998, + "args": { + "External id": 508753,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144010927.399, "dur": 3.038, + "args": { + "External id": 508754,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144010932.075, "dur": 0.638, + "args": { + "External id": 508755,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144010934.417, "dur": 0.597, + "args": { + "External id": 508756,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144010956.043, "dur": 30804.908, + "args": { + "External id": 508757,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144010971.921, "dur": 30781.271, + "args": { + "External id": 508758,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144010987.116, "dur": 16.265, + "args": { + "External id": 508759,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144011006.911, "dur": 30710.461, + "args": { + "External id": 508760,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144011009.770, "dur": 30706.908, + "args": { + "External id": 508761,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144011015.604, "dur": 6.174, + "args": { + "External id": 508762,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144011023.461, "dur": 30689.868, + "args": { + "External id": 508763,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144041941.053, "dur": 32.358, + "args": { + "External id": 508764,"Sequence number": 5516246, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16800 + } + }, + { + "ph": "s", "id": 441, "pid": 2338711, "tid": 2338711, "ts": 6274144041941.053, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144041959.756, "dur": 8.751, + "args": { + "External id": 508765,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144041963.960, "dur": 4.314, + "args": { + "External id": 508766,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144042039.059, "dur": 99.638, + "args": { + "External id": 508767,"Record function id": 0, "Ev Idx": 16803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144042141.580, "dur": 1146.840, + "args": { + "External id": 508768,"Record function id": 0, "Ev Idx": 16804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144042182.388, "dur": 1093.095, + "args": { + "External id": 508769,"Sequence number": 5516247, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16805 + } + }, + { + "ph": "s", "id": 440, "pid": 2338711, "tid": 2338711, "ts": 6274144042182.388, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144042251.949, "dur": 55.039, + "args": { + "External id": 508770,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144042322.588, "dur": 107.826, + "args": { + "External id": 508771,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144042441.409, "dur": 37.989, + "args": { + "External id": 508772,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144042488.001, "dur": 29.551, + "args": { + "External id": 508773,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144042547.107, "dur": 28.124, + "args": { + "External id": 508774,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144042612.637, "dur": 21.395, + "args": { + "External id": 508775,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144042658.379, "dur": 140.389, + "args": { + "External id": 508776,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144042710.310, "dur": 15.446, + "args": { + "External id": 508777,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144042716.117, "dur": 8.880, + "args": { + "External id": 508778,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144042728.961, "dur": 3.936, + "args": { + "External id": 508779,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144042734.188, "dur": 0.940, + "args": { + "External id": 508780,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144042738.152, "dur": 5.564, + "args": { + "External id": 508781,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144042810.059, "dur": 54.865, + "args": { + "External id": 508782,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144042899.785, "dur": 30.639, + "args": { + "External id": 508783,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144042940.734, "dur": 41.254, + "args": { + "External id": 508784,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144042991.036, "dur": 35.834, + "args": { + "External id": 508785,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144043047.981, "dur": 61.742, + "args": { + "External id": 508786,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144043119.882, "dur": 41.713, + "args": { + "External id": 508787,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144043183.750, "dur": 21.026, + "args": { + "External id": 508788,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16824 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.3)", "pid": 2338711, "tid": 2338711, + "ts": 6274144043356.418, "dur": 80.279, + "args": { + "External id": 508789,"Record function id": 0, "Ev Idx": 16825 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144043512.539, "dur": 48.355, + "args": { + "External id": 508790,"Record function id": 0, "Ev Idx": 16826 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.4)", "pid": 2338711, "tid": 2338711, + "ts": 6274144043569.191, "dur": 29298.962, + "args": { + "External id": 508791,"Record function id": 0, "Ev Idx": 16827 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2338711, "tid": 2338711, + "ts": 6274144043578.286, "dur": 960.526, + "args": { + "External id": 508792,"Record function id": 0, "Ev Idx": 16828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144043676.379, "dur": 10.530, + "args": { + "External id": 508793,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144043702.273, "dur": 41.902, + "args": { + "External id": 508794,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144043708.020, "dur": 2.480, + "args": { + "External id": 508795,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144043715.860, "dur": 0.400, + "args": { + "External id": 508796,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144043717.745, "dur": 0.690, + "args": { + "External id": 508797,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144043719.753, "dur": 0.569, + "args": { + "External id": 508798,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144043723.622, "dur": 0.563, + "args": { + "External id": 508799,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144043725.808, "dur": 0.748, + "args": { + "External id": 508800,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144043728.287, "dur": 5.144, + "args": { + "External id": 508801,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144043734.829, "dur": 0.408, + "args": { + "External id": 508802,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144043736.702, "dur": 0.703, + "args": { + "External id": 508803,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144043756.674, "dur": 54.447, + "args": { + "External id": 508804,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144043848.480, "dur": 120.154, + "args": { + "External id": 508805,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144043859.696, "dur": 5.231, + "args": { + "External id": 508806,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144043870.192, "dur": 11.043, + "args": { + "External id": 508807,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144043874.886, "dur": 5.803, + "args": { + "External id": 508808,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144043878.813, "dur": 0.613, + "args": { + "External id": 508809,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144043887.768, "dur": 29.366, + "args": { + "External id": 508810,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144043889.868, "dur": 0.376, + "args": { + "External id": 508811,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144043891.692, "dur": 2.157, + "args": { + "External id": 508812,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144043895.048, "dur": 0.384, + "args": { + "External id": 508813,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144043896.591, "dur": 2.500, + "args": { + "External id": 508814,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144043902.681, "dur": 0.317, + "args": { + "External id": 508815,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144043904.525, "dur": 0.389, + "args": { + "External id": 508816,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144043906.762, "dur": 0.452, + "args": { + "External id": 508817,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144043910.257, "dur": 0.462, + "args": { + "External id": 508818,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144043912.196, "dur": 0.263, + "args": { + "External id": 508819,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144043927.720, "dur": 33.089, + "args": { + "External id": 508820,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144044027.261, "dur": 408.713, + "args": { + "External id": 508821,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144044099.165, "dur": 330.687, + "args": { + "External id": 508822,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16858, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144044111.600, "dur": 312.760, + "args": { + "External id": 508823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144044461.614, "dur": 2.436, + "args": { + "External id": 508824,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16860, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2338711, "tid": 2338711, + "ts": 6274144044559.855, "dur": 28120.217, + "args": { + "External id": 508825,"Record function id": 0, "Ev Idx": 16861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144044682.548, "dur": 6.745, + "args": { + "External id": 508826,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144044693.263, "dur": 0.884, + "args": { + "External id": 508827,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144044695.714, "dur": 3.659, + "args": { + "External id": 508828,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144044701.552, "dur": 0.998, + "args": { + "External id": 508829,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144044704.305, "dur": 1.014, + "args": { + "External id": 508830,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144044706.983, "dur": 0.876, + "args": { + "External id": 508831,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144044711.602, "dur": 0.800, + "args": { + "External id": 508832,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144044714.175, "dur": 2.080, + "args": { + "External id": 508833,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144044717.619, "dur": 0.924, + "args": { + "External id": 508834,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144044720.336, "dur": 0.768, + "args": { + "External id": 508835,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144044743.045, "dur": 27893.526, + "args": { + "External id": 508836,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144044759.236, "dur": 27869.302, + "args": { + "External id": 508837,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144044774.559, "dur": 16.108, + "args": { + "External id": 508838,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144044794.162, "dur": 27785.743, + "args": { + "External id": 508839,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144044797.114, "dur": 27782.110, + "args": { + "External id": 508840,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144044803.715, "dur": 5.567, + "args": { + "External id": 508841,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144044811.076, "dur": 27764.703, + "args": { + "External id": 508842,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144072810.916, "dur": 30.118, + "args": { + "External id": 508843,"Sequence number": 5516248, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16879 + } + }, + { + "ph": "s", "id": 439, "pid": 2338711, "tid": 2338711, "ts": 6274144072810.916, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144072828.243, "dur": 8.363, + "args": { + "External id": 508844,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144072832.068, "dur": 4.301, + "args": { + "External id": 508845,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144072911.723, "dur": 73.041, + "args": { + "External id": 508846,"Record function id": 0, "Ev Idx": 16882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144072986.166, "dur": 1173.658, + "args": { + "External id": 508847,"Record function id": 0, "Ev Idx": 16883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144073023.716, "dur": 1122.011, + "args": { + "External id": 508848,"Sequence number": 5516249, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16884 + } + }, + { + "ph": "s", "id": 438, "pid": 2338711, "tid": 2338711, "ts": 6274144073023.716, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144073124.453, "dur": 52.919, + "args": { + "External id": 508849,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144073191.383, "dur": 108.274, + "args": { + "External id": 508850,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144073311.063, "dur": 37.538, + "args": { + "External id": 508851,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144073357.416, "dur": 29.155, + "args": { + "External id": 508852,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144073417.344, "dur": 28.154, + "args": { + "External id": 508853,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144073464.446, "dur": 22.371, + "args": { + "External id": 508854,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144073511.594, "dur": 170.326, + "args": { + "External id": 508855,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144073568.417, "dur": 11.752, + "args": { + "External id": 508856,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144073573.975, "dur": 5.433, + "args": { + "External id": 508857,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144073583.136, "dur": 27.029, + "args": { + "External id": 508858,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144073612.547, "dur": 1.202, + "args": { + "External id": 508859,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144073616.860, "dur": 7.575, + "args": { + "External id": 508860,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144073696.963, "dur": 53.589, + "args": { + "External id": 508861,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144073786.256, "dur": 30.120, + "args": { + "External id": 508862,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144073826.036, "dur": 42.159, + "args": { + "External id": 508863,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144073877.055, "dur": 34.760, + "args": { + "External id": 508864,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144073935.519, "dur": 25.073, + "args": { + "External id": 508865,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144073968.435, "dur": 35.086, + "args": { + "External id": 508866,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144074022.071, "dur": 17.365, + "args": { + "External id": 508867,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16903 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.4)", "pid": 2338711, "tid": 2338711, + "ts": 6274144074230.511, "dur": 83.662, + "args": { + "External id": 508868,"Record function id": 0, "Ev Idx": 16904 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144074387.073, "dur": 46.904, + "args": { + "External id": 508869,"Record function id": 0, "Ev Idx": 16905 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.5)", "pid": 2338711, "tid": 2338711, + "ts": 6274144074441.948, "dur": 30422.207, + "args": { + "External id": 508870,"Record function id": 0, "Ev Idx": 16906 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2338711, "tid": 2338711, + "ts": 6274144074450.353, "dur": 1027.931, + "args": { + "External id": 508871,"Record function id": 0, "Ev Idx": 16907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144074529.668, "dur": 9.399, + "args": { + "External id": 508872,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144074552.572, "dur": 66.534, + "args": { + "External id": 508873,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144074558.378, "dur": 2.393, + "args": { + "External id": 508874,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144074565.117, "dur": 0.430, + "args": { + "External id": 508875,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144074567.133, "dur": 0.600, + "args": { + "External id": 508876,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144074569.597, "dur": 0.622, + "args": { + "External id": 508877,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144074573.926, "dur": 0.757, + "args": { + "External id": 508878,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144074576.431, "dur": 0.678, + "args": { + "External id": 508879,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144074578.302, "dur": 4.666, + "args": { + "External id": 508880,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144074584.665, "dur": 0.533, + "args": { + "External id": 508881,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144074607.694, "dur": 0.905, + "args": { + "External id": 508882,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144074631.029, "dur": 60.224, + "args": { + "External id": 508883,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144074727.947, "dur": 137.050, + "args": { + "External id": 508884,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144074742.991, "dur": 5.457, + "args": { + "External id": 508885,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144074753.825, "dur": 12.727, + "args": { + "External id": 508886,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144074758.229, "dur": 7.872, + "args": { + "External id": 508887,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144074762.277, "dur": 2.678, + "args": { + "External id": 508888,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144074773.809, "dur": 34.477, + "args": { + "External id": 508889,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144074775.923, "dur": 0.543, + "args": { + "External id": 508890,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144074778.423, "dur": 0.620, + "args": { + "External id": 508891,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144074780.370, "dur": 0.722, + "args": { + "External id": 508892,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144074785.350, "dur": 3.098, + "args": { + "External id": 508893,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144074789.385, "dur": 3.674, + "args": { + "External id": 508894,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144074794.895, "dur": 1.047, + "args": { + "External id": 508895,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144074797.164, "dur": 0.647, + "args": { + "External id": 508896,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144074799.434, "dur": 0.315, + "args": { + "External id": 508897,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144074803.314, "dur": 0.562, + "args": { + "External id": 508898,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144074821.836, "dur": 35.060, + "args": { + "External id": 508899,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144074919.234, "dur": 447.237, + "args": { + "External id": 508900,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144074950.850, "dur": 408.984, + "args": { + "External id": 508901,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16937, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144074961.261, "dur": 392.078, + "args": { + "External id": 508902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144075397.500, "dur": 2.739, + "args": { + "External id": 508903,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16939, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2338711, "tid": 2338711, + "ts": 6274144075501.045, "dur": 29162.960, + "args": { + "External id": 508904,"Record function id": 0, "Ev Idx": 16940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144075624.696, "dur": 7.779, + "args": { + "External id": 508905,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144075636.798, "dur": 1.178, + "args": { + "External id": 508906,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144075639.785, "dur": 3.288, + "args": { + "External id": 508907,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144075645.171, "dur": 0.993, + "args": { + "External id": 508908,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144075647.724, "dur": 1.066, + "args": { + "External id": 508909,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144075652.514, "dur": 0.853, + "args": { + "External id": 508910,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144075655.183, "dur": 1.040, + "args": { + "External id": 508911,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144075657.816, "dur": 2.563, + "args": { + "External id": 508912,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144075661.877, "dur": 0.990, + "args": { + "External id": 508913,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144075666.439, "dur": 0.961, + "args": { + "External id": 508914,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144075687.146, "dur": 28930.173, + "args": { + "External id": 508915,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144075704.267, "dur": 28905.036, + "args": { + "External id": 508916,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144075721.323, "dur": 15.844, + "args": { + "External id": 508917,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144075740.703, "dur": 28823.173, + "args": { + "External id": 508918,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144075743.535, "dur": 28819.679, + "args": { + "External id": 508919,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144075750.294, "dur": 6.265, + "args": { + "External id": 508920,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144075758.509, "dur": 28801.071, + "args": { + "External id": 508921,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144104799.856, "dur": 33.104, + "args": { + "External id": 508922,"Sequence number": 5516250, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16958 + } + }, + { + "ph": "s", "id": 437, "pid": 2338711, "tid": 2338711, "ts": 6274144104799.856, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144104819.151, "dur": 8.974, + "args": { + "External id": 508923,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144104823.303, "dur": 4.576, + "args": { + "External id": 508924,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144104905.671, "dur": 71.693, + "args": { + "External id": 508925,"Record function id": 0, "Ev Idx": 16961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144104978.698, "dur": 1214.034, + "args": { + "External id": 508926,"Record function id": 0, "Ev Idx": 16962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144105018.756, "dur": 1159.610, + "args": { + "External id": 508927,"Sequence number": 5516251, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16963 + } + }, + { + "ph": "s", "id": 436, "pid": 2338711, "tid": 2338711, "ts": 6274144105018.756, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144105129.958, "dur": 55.135, + "args": { + "External id": 508928,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144105229.394, "dur": 103.519, + "args": { + "External id": 508929,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144105346.301, "dur": 37.293, + "args": { + "External id": 508930,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144105392.795, "dur": 30.341, + "args": { + "External id": 508931,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144105451.360, "dur": 27.148, + "args": { + "External id": 508932,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144105500.043, "dur": 19.143, + "args": { + "External id": 508933,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144105542.578, "dur": 166.672, + "args": { + "External id": 508934,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144105616.820, "dur": 15.330, + "args": { + "External id": 508935,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144105622.330, "dur": 5.740, + "args": { + "External id": 508936,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144105634.808, "dur": 4.588, + "args": { + "External id": 508937,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144105640.510, "dur": 1.280, + "args": { + "External id": 508938,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144105644.310, "dur": 7.602, + "args": { + "External id": 508939,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144105720.614, "dur": 54.244, + "args": { + "External id": 508940,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144105808.067, "dur": 31.660, + "args": { + "External id": 508941,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144105850.354, "dur": 41.988, + "args": { + "External id": 508942,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144105899.445, "dur": 34.980, + "args": { + "External id": 508943,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144105958.564, "dur": 24.504, + "args": { + "External id": 508944,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144105990.920, "dur": 35.959, + "args": { + "External id": 508945,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144106045.556, "dur": 55.645, + "args": { + "External id": 508946,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16982 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.5)", "pid": 2338711, "tid": 2338711, + "ts": 6274144106262.657, "dur": 80.049, + "args": { + "External id": 508947,"Record function id": 0, "Ev Idx": 16983 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144106416.383, "dur": 46.413, + "args": { + "External id": 508948,"Record function id": 0, "Ev Idx": 16984 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.6)", "pid": 2338711, "tid": 2338711, + "ts": 6274144106471.958, "dur": 29090.570, + "args": { + "External id": 508949,"Record function id": 0, "Ev Idx": 16985 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2338711, "tid": 2338711, + "ts": 6274144106481.781, "dur": 917.023, + "args": { + "External id": 508950,"Record function id": 0, "Ev Idx": 16986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144106560.940, "dur": 9.547, + "args": { + "External id": 508951,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144106584.985, "dur": 60.207, + "args": { + "External id": 508952,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144106607.766, "dur": 2.532, + "args": { + "External id": 508953,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144106615.692, "dur": 0.472, + "args": { + "External id": 508954,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144106617.259, "dur": 0.680, + "args": { + "External id": 508955,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144106619.506, "dur": 0.565, + "args": { + "External id": 508956,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144106622.830, "dur": 0.337, + "args": { + "External id": 508957,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144106627.479, "dur": 0.571, + "args": { + "External id": 508958,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144106629.339, "dur": 5.136, + "args": { + "External id": 508959,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144106636.201, "dur": 0.625, + "args": { + "External id": 508960,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144106638.173, "dur": 0.538, + "args": { + "External id": 508961,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144106658.053, "dur": 59.849, + "args": { + "External id": 508962,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144106753.510, "dur": 126.489, + "args": { + "External id": 508963,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144106765.325, "dur": 5.114, + "args": { + "External id": 508964,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144106775.399, "dur": 11.087, + "args": { + "External id": 508965,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144106779.908, "dur": 6.184, + "args": { + "External id": 508966,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144106784.249, "dur": 0.649, + "args": { + "External id": 508967,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144106793.956, "dur": 32.991, + "args": { + "External id": 508968,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144106795.943, "dur": 3.069, + "args": { + "External id": 508969,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144106800.368, "dur": 0.474, + "args": { + "External id": 508970,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144106802.199, "dur": 0.535, + "args": { + "External id": 508971,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144106806.423, "dur": 2.862, + "args": { + "External id": 508972,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144106810.426, "dur": 0.362, + "args": { + "External id": 508973,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144106812.406, "dur": 0.347, + "args": { + "External id": 508974,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144106815.786, "dur": 0.451, + "args": { + "External id": 508975,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144106817.984, "dur": 0.304, + "args": { + "External id": 508976,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144106819.171, "dur": 2.702, + "args": { + "External id": 508977,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144106839.328, "dur": 32.491, + "args": { + "External id": 508978,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144106934.153, "dur": 353.511, + "args": { + "External id": 508979,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144106969.307, "dur": 312.926, + "args": { + "External id": 508980,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17016, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144106980.055, "dur": 294.574, + "args": { + "External id": 508981,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144107319.232, "dur": 2.274, + "args": { + "External id": 508982,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17018, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2338711, "tid": 2338711, + "ts": 6274144107419.476, "dur": 27959.202, + "args": { + "External id": 508983,"Record function id": 0, "Ev Idx": 17019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144107522.875, "dur": 6.642, + "args": { + "External id": 508984,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144107532.838, "dur": 1.073, + "args": { + "External id": 508985,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144107535.556, "dur": 4.425, + "args": { + "External id": 508986,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144107541.591, "dur": 1.074, + "args": { + "External id": 508987,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144107543.950, "dur": 1.067, + "args": { + "External id": 508988,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144107546.445, "dur": 0.878, + "args": { + "External id": 508989,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144107551.056, "dur": 1.050, + "args": { + "External id": 508990,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144107553.538, "dur": 1.907, + "args": { + "External id": 508991,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144107556.564, "dur": 0.868, + "args": { + "External id": 508992,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144107558.938, "dur": 0.675, + "args": { + "External id": 508993,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144107582.860, "dur": 27753.965, + "args": { + "External id": 508994,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144107616.892, "dur": 27712.610, + "args": { + "External id": 508995,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144107633.696, "dur": 16.316, + "args": { + "External id": 508996,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144107653.891, "dur": 27641.090, + "args": { + "External id": 508997,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144107656.583, "dur": 27637.833, + "args": { + "External id": 508998,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144107662.976, "dur": 7.646, + "args": { + "External id": 508999,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144107672.729, "dur": 27618.422, + "args": { + "External id": 509000,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144135506.102, "dur": 30.219, + "args": { + "External id": 509001,"Sequence number": 5516252, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17037 + } + }, + { + "ph": "s", "id": 435, "pid": 2338711, "tid": 2338711, "ts": 6274144135506.102, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144135522.715, "dur": 9.074, + "args": { + "External id": 509002,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144135526.948, "dur": 4.646, + "args": { + "External id": 509003,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144135617.527, "dur": 77.081, + "args": { + "External id": 509004,"Record function id": 0, "Ev Idx": 17040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144135696.077, "dur": 1144.119, + "args": { + "External id": 509005,"Record function id": 0, "Ev Idx": 17041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144135737.038, "dur": 1088.780, + "args": { + "External id": 509006,"Sequence number": 5516253, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17042 + } + }, + { + "ph": "s", "id": 434, "pid": 2338711, "tid": 2338711, "ts": 6274144135737.038, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144135809.470, "dur": 50.967, + "args": { + "External id": 509007,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144135873.363, "dur": 105.279, + "args": { + "External id": 509008,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144135990.067, "dur": 37.055, + "args": { + "External id": 509009,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144136035.536, "dur": 66.550, + "args": { + "External id": 509010,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144136135.616, "dur": 27.601, + "args": { + "External id": 509011,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144136183.632, "dur": 19.129, + "args": { + "External id": 509012,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144136226.355, "dur": 133.834, + "args": { + "External id": 509013,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144136277.909, "dur": 11.785, + "args": { + "External id": 509014,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144136283.610, "dur": 5.009, + "args": { + "External id": 509015,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144136292.415, "dur": 4.267, + "args": { + "External id": 509016,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144136297.912, "dur": 0.890, + "args": { + "External id": 509017,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144136301.477, "dur": 5.977, + "args": { + "External id": 509018,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144136371.164, "dur": 57.675, + "args": { + "External id": 509019,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144136465.888, "dur": 29.520, + "args": { + "External id": 509020,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144136504.567, "dur": 42.777, + "args": { + "External id": 509021,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144136557.015, "dur": 52.650, + "args": { + "External id": 509022,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144136635.542, "dur": 30.188, + "args": { + "External id": 509023,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144136674.346, "dur": 38.538, + "args": { + "External id": 509024,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144136732.361, "dur": 18.445, + "args": { + "External id": 509025,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17061 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.6)", "pid": 2338711, "tid": 2338711, + "ts": 6274144136910.254, "dur": 81.339, + "args": { + "External id": 509026,"Record function id": 0, "Ev Idx": 17062 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144137106.002, "dur": 50.999, + "args": { + "External id": 509027,"Record function id": 0, "Ev Idx": 17063 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.7)", "pid": 2338711, "tid": 2338711, + "ts": 6274144137166.747, "dur": 30253.367, + "args": { + "External id": 509028,"Record function id": 0, "Ev Idx": 17064 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2338711, "tid": 2338711, + "ts": 6274144137174.620, "dur": 995.570, + "args": { + "External id": 509029,"Record function id": 0, "Ev Idx": 17065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144137260.675, "dur": 9.824, + "args": { + "External id": 509030,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144137284.347, "dur": 43.817, + "args": { + "External id": 509031,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144137289.808, "dur": 2.488, + "args": { + "External id": 509032,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144137300.505, "dur": 0.525, + "args": { + "External id": 509033,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144137302.284, "dur": 0.632, + "args": { + "External id": 509034,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144137304.082, "dur": 0.468, + "args": { + "External id": 509035,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144137308.183, "dur": 0.477, + "args": { + "External id": 509036,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144137309.955, "dur": 0.391, + "args": { + "External id": 509037,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144137311.640, "dur": 5.589, + "args": { + "External id": 509038,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144137318.978, "dur": 0.415, + "args": { + "External id": 509039,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144137320.681, "dur": 0.316, + "args": { + "External id": 509040,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144137339.664, "dur": 55.487, + "args": { + "External id": 509041,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144137437.724, "dur": 144.699, + "args": { + "External id": 509042,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144137468.191, "dur": 5.439, + "args": { + "External id": 509043,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144137479.160, "dur": 10.578, + "args": { + "External id": 509044,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144137483.743, "dur": 5.583, + "args": { + "External id": 509045,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144137487.362, "dur": 0.639, + "args": { + "External id": 509046,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144137496.672, "dur": 29.572, + "args": { + "External id": 509047,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144137498.860, "dur": 0.490, + "args": { + "External id": 509048,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144137501.125, "dur": 2.477, + "args": { + "External id": 509049,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144137504.664, "dur": 0.358, + "args": { + "External id": 509050,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144137506.418, "dur": 2.700, + "args": { + "External id": 509051,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144137512.189, "dur": 0.349, + "args": { + "External id": 509052,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144137513.960, "dur": 0.335, + "args": { + "External id": 509053,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144137515.592, "dur": 0.345, + "args": { + "External id": 509054,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144137520.145, "dur": 0.271, + "args": { + "External id": 509055,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144137521.851, "dur": 0.276, + "args": { + "External id": 509056,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144137541.111, "dur": 33.316, + "args": { + "External id": 509057,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144137659.085, "dur": 371.423, + "args": { + "External id": 509058,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144137690.214, "dur": 335.261, + "args": { + "External id": 509059,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17095, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144137700.836, "dur": 315.993, + "args": { + "External id": 509060,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144138052.153, "dur": 37.210, + "args": { + "External id": 509061,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17097, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2338711, "tid": 2338711, + "ts": 6274144138192.231, "dur": 29036.994, + "args": { + "External id": 509062,"Record function id": 0, "Ev Idx": 17098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144138294.436, "dur": 7.092, + "args": { + "External id": 509063,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144138305.268, "dur": 0.910, + "args": { + "External id": 509064,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144138308.036, "dur": 3.007, + "args": { + "External id": 509065,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144138312.610, "dur": 0.802, + "args": { + "External id": 509066,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144138314.759, "dur": 0.829, + "args": { + "External id": 509067,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144138316.821, "dur": 1.042, + "args": { + "External id": 509068,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144138321.604, "dur": 0.837, + "args": { + "External id": 509069,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144138324.274, "dur": 2.357, + "args": { + "External id": 509070,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144138327.879, "dur": 0.699, + "args": { + "External id": 509071,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144138330.204, "dur": 0.854, + "args": { + "External id": 509072,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144138352.184, "dur": 28834.440, + "args": { + "External id": 509073,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144138367.791, "dur": 28810.850, + "args": { + "External id": 509074,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144138382.895, "dur": 16.101, + "args": { + "External id": 509075,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144138402.697, "dur": 28740.535, + "args": { + "External id": 509076,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144138405.725, "dur": 28736.978, + "args": { + "External id": 509077,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144138411.797, "dur": 6.943, + "args": { + "External id": 509078,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144138420.388, "dur": 28719.209, + "args": { + "External id": 509079,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144167363.175, "dur": 30.970, + "args": { + "External id": 509080,"Sequence number": 5516254, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17116 + } + }, + { + "ph": "s", "id": 433, "pid": 2338711, "tid": 2338711, "ts": 6274144167363.175, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144167380.679, "dur": 8.865, + "args": { + "External id": 509081,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144167384.950, "dur": 4.384, + "args": { + "External id": 509082,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144167461.050, "dur": 72.749, + "args": { + "External id": 509083,"Record function id": 0, "Ev Idx": 17119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144167535.132, "dur": 1167.622, + "args": { + "External id": 509084,"Record function id": 0, "Ev Idx": 17120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144167573.853, "dur": 1115.206, + "args": { + "External id": 509085,"Sequence number": 5516255, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17121 + } + }, + { + "ph": "s", "id": 432, "pid": 2338711, "tid": 2338711, "ts": 6274144167573.853, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144167656.252, "dur": 60.019, + "args": { + "External id": 509086,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144167729.588, "dur": 105.957, + "args": { + "External id": 509087,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144167847.361, "dur": 36.588, + "args": { + "External id": 509088,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144167893.343, "dur": 28.896, + "args": { + "External id": 509089,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144167948.622, "dur": 25.068, + "args": { + "External id": 509090,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144167996.908, "dur": 16.968, + "args": { + "External id": 509091,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144168040.215, "dur": 186.647, + "args": { + "External id": 509092,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144168133.494, "dur": 12.270, + "args": { + "External id": 509093,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144168138.933, "dur": 5.944, + "args": { + "External id": 509094,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144168148.704, "dur": 4.226, + "args": { + "External id": 509095,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144168154.304, "dur": 1.235, + "args": { + "External id": 509096,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144168160.532, "dur": 7.153, + "args": { + "External id": 509097,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144168238.576, "dur": 57.582, + "args": { + "External id": 509098,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144168329.437, "dur": 31.210, + "args": { + "External id": 509099,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144168371.206, "dur": 45.231, + "args": { + "External id": 509100,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144168427.034, "dur": 35.366, + "args": { + "External id": 509101,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144168490.640, "dur": 28.697, + "args": { + "External id": 509102,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144168524.898, "dur": 34.714, + "args": { + "External id": 509103,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144168581.592, "dur": 39.018, + "args": { + "External id": 509104,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17140 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.7)", "pid": 2338711, "tid": 2338711, + "ts": 6274144168771.042, "dur": 77.447, + "args": { + "External id": 509105,"Record function id": 0, "Ev Idx": 17141 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144168923.790, "dur": 47.924, + "args": { + "External id": 509106,"Record function id": 0, "Ev Idx": 17142 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.8)", "pid": 2338711, "tid": 2338711, + "ts": 6274144168980.311, "dur": 29905.636, + "args": { + "External id": 509107,"Record function id": 0, "Ev Idx": 17143 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2338711, "tid": 2338711, + "ts": 6274144168989.417, "dur": 967.757, + "args": { + "External id": 509108,"Record function id": 0, "Ev Idx": 17144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144169107.049, "dur": 10.012, + "args": { + "External id": 509109,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144169132.285, "dur": 37.861, + "args": { + "External id": 509110,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144169138.121, "dur": 2.337, + "args": { + "External id": 509111,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144169145.004, "dur": 0.388, + "args": { + "External id": 509112,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144169146.427, "dur": 0.581, + "args": { + "External id": 509113,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144169148.596, "dur": 0.575, + "args": { + "External id": 509114,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144169151.992, "dur": 0.673, + "args": { + "External id": 509115,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144169154.212, "dur": 0.694, + "args": { + "External id": 509116,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144169155.842, "dur": 4.319, + "args": { + "External id": 509117,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144169161.913, "dur": 0.327, + "args": { + "External id": 509118,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144169163.522, "dur": 0.385, + "args": { + "External id": 509119,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144169181.713, "dur": 54.691, + "args": { + "External id": 509120,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144169271.969, "dur": 120.778, + "args": { + "External id": 509121,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144169283.260, "dur": 3.980, + "args": { + "External id": 509122,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144169292.086, "dur": 12.943, + "args": { + "External id": 509123,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144169298.850, "dur": 5.766, + "args": { + "External id": 509124,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144169302.795, "dur": 0.737, + "args": { + "External id": 509125,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144169312.255, "dur": 31.032, + "args": { + "External id": 509126,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144169314.221, "dur": 2.627, + "args": { + "External id": 509127,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144169318.590, "dur": 0.457, + "args": { + "External id": 509128,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144169320.229, "dur": 0.332, + "args": { + "External id": 509129,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144169323.991, "dur": 2.511, + "args": { + "External id": 509130,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144169328.058, "dur": 0.426, + "args": { + "External id": 509131,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144169330.326, "dur": 0.277, + "args": { + "External id": 509132,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144169333.483, "dur": 0.298, + "args": { + "External id": 509133,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144169335.160, "dur": 0.273, + "args": { + "External id": 509134,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144169336.637, "dur": 2.465, + "args": { + "External id": 509135,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144169353.447, "dur": 31.357, + "args": { + "External id": 509136,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144169448.065, "dur": 399.673, + "args": { + "External id": 509137,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144169479.541, "dur": 362.143, + "args": { + "External id": 509138,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17174, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144169490.011, "dur": 345.473, + "args": { + "External id": 509139,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144169876.197, "dur": 2.570, + "args": { + "External id": 509140,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17176, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2338711, "tid": 2338711, + "ts": 6274144169978.398, "dur": 28719.045, + "args": { + "External id": 509141,"Record function id": 0, "Ev Idx": 17177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144170128.341, "dur": 7.441, + "args": { + "External id": 509142,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144170139.459, "dur": 1.081, + "args": { + "External id": 509143,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144170142.233, "dur": 3.467, + "args": { + "External id": 509144,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144170147.433, "dur": 0.806, + "args": { + "External id": 509145,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144170149.620, "dur": 1.046, + "args": { + "External id": 509146,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144170151.767, "dur": 0.940, + "args": { + "External id": 509147,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144170156.481, "dur": 0.953, + "args": { + "External id": 509148,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144170158.797, "dur": 1.986, + "args": { + "External id": 509149,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144170162.123, "dur": 0.817, + "args": { + "External id": 509150,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144170164.341, "dur": 0.692, + "args": { + "External id": 509151,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144170187.074, "dur": 28466.153, + "args": { + "External id": 509152,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144170203.059, "dur": 28442.267, + "args": { + "External id": 509153,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144170218.410, "dur": 16.145, + "args": { + "External id": 509154,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144170238.242, "dur": 28369.210, + "args": { + "External id": 509155,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144170241.040, "dur": 28365.784, + "args": { + "External id": 509156,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144170246.966, "dur": 5.493, + "args": { + "External id": 509157,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144170254.130, "dur": 28349.505, + "args": { + "External id": 509158,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144198831.300, "dur": 29.891, + "args": { + "External id": 509159,"Sequence number": 5516256, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17195 + } + }, + { + "ph": "s", "id": 431, "pid": 2338711, "tid": 2338711, "ts": 6274144198831.300, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144198847.797, "dur": 8.546, + "args": { + "External id": 509160,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144198851.839, "dur": 4.367, + "args": { + "External id": 509161,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144198926.806, "dur": 75.594, + "args": { + "External id": 509162,"Record function id": 0, "Ev Idx": 17198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144199003.980, "dur": 1152.527, + "args": { + "External id": 509163,"Record function id": 0, "Ev Idx": 17199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144199046.413, "dur": 1094.818, + "args": { + "External id": 509164,"Sequence number": 5516257, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17200 + } + }, + { + "ph": "s", "id": 430, "pid": 2338711, "tid": 2338711, "ts": 6274144199046.413, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144199147.860, "dur": 50.124, + "args": { + "External id": 509165,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144199211.607, "dur": 104.916, + "args": { + "External id": 509166,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144199329.940, "dur": 37.149, + "args": { + "External id": 509167,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144199376.212, "dur": 29.646, + "args": { + "External id": 509168,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144199430.956, "dur": 23.092, + "args": { + "External id": 509169,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144199473.476, "dur": 16.182, + "args": { + "External id": 509170,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144199512.738, "dur": 156.445, + "args": { + "External id": 509171,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144199563.580, "dur": 11.191, + "args": { + "External id": 509172,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144199568.967, "dur": 4.914, + "args": { + "External id": 509173,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144199577.347, "dur": 4.429, + "args": { + "External id": 509174,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144199582.912, "dur": 1.262, + "args": { + "External id": 509175,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144199603.783, "dur": 6.806, + "args": { + "External id": 509176,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144199683.983, "dur": 51.621, + "args": { + "External id": 509177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144199771.922, "dur": 31.548, + "args": { + "External id": 509178,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144199813.097, "dur": 41.716, + "args": { + "External id": 509179,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144199864.168, "dur": 38.745, + "args": { + "External id": 509180,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144199929.895, "dur": 26.185, + "args": { + "External id": 509181,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144199963.599, "dur": 35.647, + "args": { + "External id": 509182,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144200016.778, "dur": 18.978, + "args": { + "External id": 509183,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17219 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.8)", "pid": 2338711, "tid": 2338711, + "ts": 6274144200224.876, "dur": 80.998, + "args": { + "External id": 509184,"Record function id": 0, "Ev Idx": 17220 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144200383.304, "dur": 46.217, + "args": { + "External id": 509185,"Record function id": 0, "Ev Idx": 17221 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.9)", "pid": 2338711, "tid": 2338711, + "ts": 6274144200438.110, "dur": 29492.091, + "args": { + "External id": 509186,"Record function id": 0, "Ev Idx": 17222 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2338711, "tid": 2338711, + "ts": 6274144200446.645, "dur": 963.855, + "args": { + "External id": 509187,"Record function id": 0, "Ev Idx": 17223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144200521.771, "dur": 9.307, + "args": { + "External id": 509188,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144200544.747, "dur": 37.864, + "args": { + "External id": 509189,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144200550.233, "dur": 2.324, + "args": { + "External id": 509190,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144200557.250, "dur": 0.409, + "args": { + "External id": 509191,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144200558.883, "dur": 0.439, + "args": { + "External id": 509192,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144200561.167, "dur": 0.523, + "args": { + "External id": 509193,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144200564.561, "dur": 0.509, + "args": { + "External id": 509194,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144200566.382, "dur": 0.452, + "args": { + "External id": 509195,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144200568.210, "dur": 4.665, + "args": { + "External id": 509196,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144200574.565, "dur": 0.447, + "args": { + "External id": 509197,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144200576.524, "dur": 0.260, + "args": { + "External id": 509198,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144200614.906, "dur": 59.038, + "args": { + "External id": 509199,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144200710.805, "dur": 125.304, + "args": { + "External id": 509200,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144200722.025, "dur": 4.913, + "args": { + "External id": 509201,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144200732.051, "dur": 13.978, + "args": { + "External id": 509202,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144200739.169, "dur": 6.441, + "args": { + "External id": 509203,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144200743.105, "dur": 0.857, + "args": { + "External id": 509204,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144200753.011, "dur": 28.886, + "args": { + "External id": 509205,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144200755.019, "dur": 2.707, + "args": { + "External id": 509206,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144200759.124, "dur": 0.413, + "args": { + "External id": 509207,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144200761.172, "dur": 0.400, + "args": { + "External id": 509208,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144200765.289, "dur": 2.614, + "args": { + "External id": 509209,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144200768.982, "dur": 0.426, + "args": { + "External id": 509210,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144200770.557, "dur": 0.372, + "args": { + "External id": 509211,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144200772.215, "dur": 0.416, + "args": { + "External id": 509212,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144200773.859, "dur": 0.353, + "args": { + "External id": 509213,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144200775.349, "dur": 2.522, + "args": { + "External id": 509214,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144200794.763, "dur": 33.646, + "args": { + "External id": 509215,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144200889.584, "dur": 409.806, + "args": { + "External id": 509216,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144200920.366, "dur": 373.237, + "args": { + "External id": 509217,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17253, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144200930.146, "dur": 356.911, + "args": { + "External id": 509218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144201325.556, "dur": 2.669, + "args": { + "External id": 509219,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17255, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2338711, "tid": 2338711, + "ts": 6274144201432.179, "dur": 28303.759, + "args": { + "External id": 509220,"Record function id": 0, "Ev Idx": 17256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144201530.034, "dur": 6.424, + "args": { + "External id": 509221,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144201539.505, "dur": 1.233, + "args": { + "External id": 509222,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144201542.298, "dur": 2.862, + "args": { + "External id": 509223,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144201546.778, "dur": 1.073, + "args": { + "External id": 509224,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144201549.437, "dur": 0.977, + "args": { + "External id": 509225,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144201551.745, "dur": 0.907, + "args": { + "External id": 509226,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144201556.017, "dur": 0.772, + "args": { + "External id": 509227,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144201558.431, "dur": 2.069, + "args": { + "External id": 509228,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144201562.017, "dur": 0.672, + "args": { + "External id": 509229,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144201563.983, "dur": 0.862, + "args": { + "External id": 509230,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144201584.650, "dur": 28104.382, + "args": { + "External id": 509231,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144201622.872, "dur": 28057.841, + "args": { + "External id": 509232,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144201638.743, "dur": 16.680, + "args": { + "External id": 509233,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144201662.586, "dur": 27982.726, + "args": { + "External id": 509234,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144201665.263, "dur": 27979.394, + "args": { + "External id": 509235,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144201671.639, "dur": 5.714, + "args": { + "External id": 509236,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144201678.994, "dur": 27962.653, + "args": { + "External id": 509237,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144229870.597, "dur": 34.408, + "args": { + "External id": 509238,"Sequence number": 5516258, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17274 + } + }, + { + "ph": "s", "id": 429, "pid": 2338711, "tid": 2338711, "ts": 6274144229870.597, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144229891.569, "dur": 8.552, + "args": { + "External id": 509239,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144229895.609, "dur": 4.252, + "args": { + "External id": 509240,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144229975.630, "dur": 73.316, + "args": { + "External id": 509241,"Record function id": 0, "Ev Idx": 17277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144230050.100, "dur": 1175.871, + "args": { + "External id": 509242,"Record function id": 0, "Ev Idx": 17278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144230120.834, "dur": 1090.999, + "args": { + "External id": 509243,"Sequence number": 5516259, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17279 + } + }, + { + "ph": "s", "id": 428, "pid": 2338711, "tid": 2338711, "ts": 6274144230120.834, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144230193.348, "dur": 51.999, + "args": { + "External id": 509244,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144230258.346, "dur": 105.614, + "args": { + "External id": 509245,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144230376.249, "dur": 42.306, + "args": { + "External id": 509246,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144230427.307, "dur": 29.413, + "args": { + "External id": 509247,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144230483.017, "dur": 27.765, + "args": { + "External id": 509248,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144230530.459, "dur": 21.203, + "args": { + "External id": 509249,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144230578.476, "dur": 159.195, + "args": { + "External id": 509250,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144230651.877, "dur": 12.083, + "args": { + "External id": 509251,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144230656.881, "dur": 6.193, + "args": { + "External id": 509252,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144230666.852, "dur": 4.198, + "args": { + "External id": 509253,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144230672.190, "dur": 0.842, + "args": { + "External id": 509254,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144230675.379, "dur": 5.769, + "args": { + "External id": 509255,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144230751.669, "dur": 52.253, + "args": { + "External id": 509256,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144230839.626, "dur": 32.045, + "args": { + "External id": 509257,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144230881.924, "dur": 45.728, + "args": { + "External id": 509258,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144230936.618, "dur": 35.080, + "args": { + "External id": 509259,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144230994.415, "dur": 25.613, + "args": { + "External id": 509260,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144231025.705, "dur": 67.560, + "args": { + "External id": 509261,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144231119.527, "dur": 20.710, + "args": { + "External id": 509262,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17298 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.9)", "pid": 2338711, "tid": 2338711, + "ts": 6274144231293.670, "dur": 80.512, + "args": { + "External id": 509263,"Record function id": 0, "Ev Idx": 17299 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144231448.013, "dur": 46.312, + "args": { + "External id": 509264,"Record function id": 0, "Ev Idx": 17300 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.10)", "pid": 2338711, "tid": 2338711, + "ts": 6274144231502.961, "dur": 31586.509, + "args": { + "External id": 509265,"Record function id": 0, "Ev Idx": 17301 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2338711, "tid": 2338711, + "ts": 6274144231510.752, "dur": 1012.909, + "args": { + "External id": 509266,"Record function id": 0, "Ev Idx": 17302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144231607.552, "dur": 10.210, + "args": { + "External id": 509267,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144231633.503, "dur": 38.557, + "args": { + "External id": 509268,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144231639.416, "dur": 2.307, + "args": { + "External id": 509269,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144231646.821, "dur": 0.473, + "args": { + "External id": 509270,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144231648.641, "dur": 0.580, + "args": { + "External id": 509271,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144231650.506, "dur": 0.373, + "args": { + "External id": 509272,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144231653.724, "dur": 0.512, + "args": { + "External id": 509273,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144231655.896, "dur": 0.397, + "args": { + "External id": 509274,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144231657.295, "dur": 4.567, + "args": { + "External id": 509275,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144231663.639, "dur": 0.438, + "args": { + "External id": 509276,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144231665.051, "dur": 0.391, + "args": { + "External id": 509277,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144231684.624, "dur": 59.322, + "args": { + "External id": 509278,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144231778.243, "dur": 128.605, + "args": { + "External id": 509279,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144231789.747, "dur": 3.989, + "args": { + "External id": 509280,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144231799.002, "dur": 15.957, + "args": { + "External id": 509281,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144231809.058, "dur": 5.509, + "args": { + "External id": 509282,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144231812.665, "dur": 0.665, + "args": { + "External id": 509283,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144231821.803, "dur": 30.933, + "args": { + "External id": 509284,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144231823.894, "dur": 2.188, + "args": { + "External id": 509285,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144231827.895, "dur": 0.439, + "args": { + "External id": 509286,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144231829.500, "dur": 0.283, + "args": { + "External id": 509287,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144231833.641, "dur": 2.507, + "args": { + "External id": 509288,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144231837.228, "dur": 0.264, + "args": { + "External id": 509289,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144231839.317, "dur": 0.595, + "args": { + "External id": 509290,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144231842.646, "dur": 0.295, + "args": { + "External id": 509291,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144231844.457, "dur": 0.590, + "args": { + "External id": 509292,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144231846.295, "dur": 2.386, + "args": { + "External id": 509293,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144231866.370, "dur": 32.493, + "args": { + "External id": 509294,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144231962.778, "dur": 451.450, + "args": { + "External id": 509295,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144231993.686, "dur": 414.428, + "args": { + "External id": 509296,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17332, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144232004.104, "dur": 396.849, + "args": { + "External id": 509297,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144232444.265, "dur": 2.599, + "args": { + "External id": 509298,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17334, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2338711, "tid": 2338711, + "ts": 6274144232544.591, "dur": 30325.738, + "args": { + "External id": 509299,"Record function id": 0, "Ev Idx": 17335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144232672.047, "dur": 7.401, + "args": { + "External id": 509300,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144232683.220, "dur": 1.072, + "args": { + "External id": 509301,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144232685.994, "dur": 3.187, + "args": { + "External id": 509302,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144232691.194, "dur": 0.842, + "args": { + "External id": 509303,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144232693.298, "dur": 0.737, + "args": { + "External id": 509304,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144232695.295, "dur": 0.883, + "args": { + "External id": 509305,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144232699.680, "dur": 1.394, + "args": { + "External id": 509306,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144232702.876, "dur": 2.562, + "args": { + "External id": 509307,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144232706.677, "dur": 0.920, + "args": { + "External id": 509308,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144232709.115, "dur": 0.753, + "args": { + "External id": 509309,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144232734.685, "dur": 30088.571, + "args": { + "External id": 509310,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144232750.831, "dur": 30063.484, + "args": { + "External id": 509311,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144232766.331, "dur": 21.490, + "args": { + "External id": 509312,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144232791.600, "dur": 29986.398, + "args": { + "External id": 509313,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144232794.324, "dur": 29982.979, + "args": { + "External id": 509314,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144232804.422, "dur": 6.330, + "args": { + "External id": 509315,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144232812.395, "dur": 29961.683, + "args": { + "External id": 509316,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144263006.285, "dur": 32.048, + "args": { + "External id": 509317,"Sequence number": 5516260, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17353 + } + }, + { + "ph": "s", "id": 427, "pid": 2338711, "tid": 2338711, "ts": 6274144263006.285, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144263024.951, "dur": 8.773, + "args": { + "External id": 509318,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144263029.031, "dur": 4.439, + "args": { + "External id": 509319,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144263130.206, "dur": 71.784, + "args": { + "External id": 509320,"Record function id": 0, "Ev Idx": 17356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144263203.702, "dur": 1143.664, + "args": { + "External id": 509321,"Record function id": 0, "Ev Idx": 17357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144263243.699, "dur": 1088.935, + "args": { + "External id": 509322,"Sequence number": 5516261, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17358 + } + }, + { + "ph": "s", "id": 426, "pid": 2338711, "tid": 2338711, "ts": 6274144263243.699, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144263311.634, "dur": 49.911, + "args": { + "External id": 509323,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144263374.016, "dur": 108.732, + "args": { + "External id": 509324,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144263494.908, "dur": 38.292, + "args": { + "External id": 509325,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144263541.517, "dur": 29.800, + "args": { + "External id": 509326,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144263613.996, "dur": 30.411, + "args": { + "External id": 509327,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144263667.828, "dur": 18.103, + "args": { + "External id": 509328,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144263710.056, "dur": 129.519, + "args": { + "External id": 509329,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144263760.234, "dur": 11.550, + "args": { + "External id": 509330,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144263765.839, "dur": 4.931, + "args": { + "External id": 509331,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144263774.823, "dur": 4.302, + "args": { + "External id": 509332,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144263780.408, "dur": 0.841, + "args": { + "External id": 509333,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144263783.625, "dur": 5.026, + "args": { + "External id": 509334,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144263850.130, "dur": 57.553, + "args": { + "External id": 509335,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144263944.048, "dur": 29.325, + "args": { + "External id": 509336,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144263983.192, "dur": 43.623, + "args": { + "External id": 509337,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144264035.827, "dur": 71.921, + "args": { + "External id": 509338,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144264139.764, "dur": 25.559, + "args": { + "External id": 509339,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144264174.272, "dur": 43.130, + "args": { + "External id": 509340,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144264236.168, "dur": 20.983, + "args": { + "External id": 509341,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17377 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.10)", "pid": 2338711, "tid": 2338711, + "ts": 6274144264414.038, "dur": 78.832, + "args": { + "External id": 509342,"Record function id": 0, "Ev Idx": 17378 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144264568.448, "dur": 65.916, + "args": { + "External id": 509343,"Record function id": 0, "Ev Idx": 17379 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.11)", "pid": 2338711, "tid": 2338711, + "ts": 6274144264644.919, "dur": 30313.010, + "args": { + "External id": 509344,"Record function id": 0, "Ev Idx": 17380 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2338711, "tid": 2338711, + "ts": 6274144264653.965, "dur": 925.227, + "args": { + "External id": 509345,"Record function id": 0, "Ev Idx": 17381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144264735.718, "dur": 10.208, + "args": { + "External id": 509346,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144264759.911, "dur": 39.347, + "args": { + "External id": 509347,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144264766.220, "dur": 2.422, + "args": { + "External id": 509348,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144264772.984, "dur": 0.414, + "args": { + "External id": 509349,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144264774.790, "dur": 0.565, + "args": { + "External id": 509350,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144264776.982, "dur": 0.580, + "args": { + "External id": 509351,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144264782.028, "dur": 0.527, + "args": { + "External id": 509352,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144264784.040, "dur": 0.460, + "args": { + "External id": 509353,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144264785.812, "dur": 3.434, + "args": { + "External id": 509354,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144264790.451, "dur": 0.574, + "args": { + "External id": 509355,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144264792.273, "dur": 0.417, + "args": { + "External id": 509356,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144264810.712, "dur": 54.293, + "args": { + "External id": 509357,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144264898.809, "dur": 121.361, + "args": { + "External id": 509358,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144264910.217, "dur": 4.044, + "args": { + "External id": 509359,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144264919.145, "dur": 12.701, + "args": { + "External id": 509360,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144264926.167, "dur": 5.284, + "args": { + "External id": 509361,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144264929.550, "dur": 0.663, + "args": { + "External id": 509362,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144264938.458, "dur": 31.627, + "args": { + "External id": 509363,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144264940.672, "dur": 2.910, + "args": { + "External id": 509364,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144264944.611, "dur": 0.591, + "args": { + "External id": 509365,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144264946.848, "dur": 0.471, + "args": { + "External id": 509366,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144264950.842, "dur": 2.386, + "args": { + "External id": 509367,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144264954.466, "dur": 0.440, + "args": { + "External id": 509368,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144264956.462, "dur": 0.526, + "args": { + "External id": 509369,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144264960.633, "dur": 0.516, + "args": { + "External id": 509370,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144264962.302, "dur": 0.350, + "args": { + "External id": 509371,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144264963.876, "dur": 2.313, + "args": { + "External id": 509372,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144264982.047, "dur": 30.565, + "args": { + "External id": 509373,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144265113.898, "dur": 366.325, + "args": { + "External id": 509374,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144265147.435, "dur": 327.603, + "args": { + "External id": 509375,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17411, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144265158.632, "dur": 310.966, + "args": { + "External id": 509376,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144265503.591, "dur": 2.487, + "args": { + "External id": 509377,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17413, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2338711, "tid": 2338711, + "ts": 6274144265619.600, "dur": 29144.337, + "args": { + "External id": 509378,"Record function id": 0, "Ev Idx": 17414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144265719.995, "dur": 6.752, + "args": { + "External id": 509379,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144265730.287, "dur": 1.056, + "args": { + "External id": 509380,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144265733.150, "dur": 2.925, + "args": { + "External id": 509381,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144265737.788, "dur": 0.699, + "args": { + "External id": 509382,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144265739.817, "dur": 0.814, + "args": { + "External id": 509383,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144265741.828, "dur": 0.796, + "args": { + "External id": 509384,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144265746.056, "dur": 0.951, + "args": { + "External id": 509385,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144265748.697, "dur": 2.213, + "args": { + "External id": 509386,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144265752.192, "dur": 0.927, + "args": { + "External id": 509387,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144265754.679, "dur": 0.631, + "args": { + "External id": 509388,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144265775.153, "dur": 28944.281, + "args": { + "External id": 509389,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144265790.967, "dur": 28920.244, + "args": { + "External id": 509390,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144265805.797, "dur": 15.535, + "args": { + "External id": 509391,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144265825.242, "dur": 28852.043, + "args": { + "External id": 509392,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144265827.915, "dur": 28848.628, + "args": { + "External id": 509393,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144265834.167, "dur": 5.803, + "args": { + "External id": 509394,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144265841.545, "dur": 28831.640, + "args": { + "External id": 509395,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144294901.240, "dur": 30.900, + "args": { + "External id": 509396,"Sequence number": 5516262, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17432 + } + }, + { + "ph": "s", "id": 425, "pid": 2338711, "tid": 2338711, "ts": 6274144294901.240, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144294918.409, "dur": 8.890, + "args": { + "External id": 509397,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144294922.460, "dur": 4.590, + "args": { + "External id": 509398,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144294999.697, "dur": 102.630, + "args": { + "External id": 509399,"Record function id": 0, "Ev Idx": 17435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144295105.512, "dur": 1158.981, + "args": { + "External id": 509400,"Record function id": 0, "Ev Idx": 17436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144295147.688, "dur": 1102.971, + "args": { + "External id": 509401,"Sequence number": 5516263, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17437 + } + }, + { + "ph": "s", "id": 424, "pid": 2338711, "tid": 2338711, "ts": 6274144295147.688, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144295216.482, "dur": 50.367, + "args": { + "External id": 509402,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144295279.328, "dur": 105.802, + "args": { + "External id": 509403,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144295398.097, "dur": 39.048, + "args": { + "External id": 509404,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144295446.362, "dur": 29.944, + "args": { + "External id": 509405,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144295509.396, "dur": 29.118, + "args": { + "External id": 509406,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144295559.540, "dur": 17.726, + "args": { + "External id": 509407,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144295625.493, "dur": 136.993, + "args": { + "External id": 509408,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144295677.518, "dur": 12.328, + "args": { + "External id": 509409,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144295683.230, "dur": 5.675, + "args": { + "External id": 509410,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144295692.699, "dur": 3.774, + "args": { + "External id": 509411,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144295698.127, "dur": 1.206, + "args": { + "External id": 509412,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144295701.877, "dur": 7.404, + "args": { + "External id": 509413,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144295777.512, "dur": 54.887, + "args": { + "External id": 509414,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144295865.336, "dur": 34.005, + "args": { + "External id": 509415,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144295911.292, "dur": 43.105, + "args": { + "External id": 509416,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144295961.083, "dur": 35.058, + "args": { + "External id": 509417,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144296017.421, "dur": 23.381, + "args": { + "External id": 509418,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144296051.117, "dur": 77.492, + "args": { + "External id": 509419,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144296153.350, "dur": 24.635, + "args": { + "External id": 509420,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17456 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.11)", "pid": 2338711, "tid": 2338711, + "ts": 6274144296335.546, "dur": 78.932, + "args": { + "External id": 509421,"Record function id": 0, "Ev Idx": 17457 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144296488.109, "dur": 45.237, + "args": { + "External id": 509422,"Record function id": 0, "Ev Idx": 17458 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.12)", "pid": 2338711, "tid": 2338711, + "ts": 6274144296542.247, "dur": 31305.595, + "args": { + "External id": 509423,"Record function id": 0, "Ev Idx": 17459 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2338711, "tid": 2338711, + "ts": 6274144296549.689, "dur": 992.709, + "args": { + "External id": 509424,"Record function id": 0, "Ev Idx": 17460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144296650.274, "dur": 9.895, + "args": { + "External id": 509425,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144296673.687, "dur": 38.761, + "args": { + "External id": 509426,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144296679.775, "dur": 2.371, + "args": { + "External id": 509427,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144296687.349, "dur": 0.658, + "args": { + "External id": 509428,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144296689.559, "dur": 0.308, + "args": { + "External id": 509429,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144296691.263, "dur": 0.464, + "args": { + "External id": 509430,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144296695.508, "dur": 0.392, + "args": { + "External id": 509431,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144296697.532, "dur": 0.503, + "args": { + "External id": 509432,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144296699.282, "dur": 3.533, + "args": { + "External id": 509433,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144296703.950, "dur": 0.439, + "args": { + "External id": 509434,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144296705.780, "dur": 0.298, + "args": { + "External id": 509435,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144296723.382, "dur": 56.534, + "args": { + "External id": 509436,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144296814.188, "dur": 150.627, + "args": { + "External id": 509437,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144296824.645, "dur": 4.509, + "args": { + "External id": 509438,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144296834.254, "dur": 38.768, + "args": { + "External id": 509439,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144296841.155, "dur": 31.407, + "args": { + "External id": 509440,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144296870.587, "dur": 0.551, + "args": { + "External id": 509441,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144296881.896, "dur": 30.971, + "args": { + "External id": 509442,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144296883.614, "dur": 2.598, + "args": { + "External id": 509443,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144296887.527, "dur": 0.316, + "args": { + "External id": 509444,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144296889.122, "dur": 0.387, + "args": { + "External id": 509445,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144296893.414, "dur": 2.595, + "args": { + "External id": 509446,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144296897.125, "dur": 0.355, + "args": { + "External id": 509447,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144296899.090, "dur": 0.446, + "args": { + "External id": 509448,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144296902.394, "dur": 0.363, + "args": { + "External id": 509449,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144296903.789, "dur": 0.328, + "args": { + "External id": 509450,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144296905.521, "dur": 2.698, + "args": { + "External id": 509451,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144296923.212, "dur": 33.587, + "args": { + "External id": 509452,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144297017.680, "dur": 410.943, + "args": { + "External id": 509453,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144297048.483, "dur": 374.268, + "args": { + "External id": 509454,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17490, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144297096.358, "dur": 321.190, + "args": { + "External id": 509455,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144297455.246, "dur": 2.529, + "args": { + "External id": 509456,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17492, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2338711, "tid": 2338711, + "ts": 6274144297563.674, "dur": 30096.574, + "args": { + "External id": 509457,"Record function id": 0, "Ev Idx": 17493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144297689.384, "dur": 6.893, + "args": { + "External id": 509458,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144297700.096, "dur": 1.095, + "args": { + "External id": 509459,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144297703.369, "dur": 3.020, + "args": { + "External id": 509460,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144297708.072, "dur": 0.701, + "args": { + "External id": 509461,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144297710.117, "dur": 1.006, + "args": { + "External id": 509462,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144297712.485, "dur": 0.910, + "args": { + "External id": 509463,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144297716.981, "dur": 0.971, + "args": { + "External id": 509464,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144297719.734, "dur": 2.338, + "args": { + "External id": 509465,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144297723.225, "dur": 0.754, + "args": { + "External id": 509466,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144297725.538, "dur": 0.490, + "args": { + "External id": 509467,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144297746.542, "dur": 29870.035, + "args": { + "External id": 509468,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144297762.455, "dur": 29845.761, + "args": { + "External id": 509469,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144297779.226, "dur": 16.022, + "args": { + "External id": 509470,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144297799.196, "dur": 29758.220, + "args": { + "External id": 509471,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144297801.944, "dur": 29754.906, + "args": { + "External id": 509472,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144297808.938, "dur": 5.620, + "args": { + "External id": 509473,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144297816.681, "dur": 29737.000, + "args": { + "External id": 509474,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144327793.259, "dur": 29.553, + "args": { + "External id": 509475,"Sequence number": 5516264, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17511 + } + }, + { + "ph": "s", "id": 423, "pid": 2338711, "tid": 2338711, "ts": 6274144327793.259, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144327810.044, "dur": 8.315, + "args": { + "External id": 509476,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144327814.223, "dur": 3.974, + "args": { + "External id": 509477,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144327888.214, "dur": 70.412, + "args": { + "External id": 509478,"Record function id": 0, "Ev Idx": 17514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144327959.548, "dur": 1175.643, + "args": { + "External id": 509479,"Record function id": 0, "Ev Idx": 17515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144327998.919, "dur": 1120.222, + "args": { + "External id": 509480,"Sequence number": 5516265, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17516 + } + }, + { + "ph": "s", "id": 422, "pid": 2338711, "tid": 2338711, "ts": 6274144327998.919, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144328094.189, "dur": 52.797, + "args": { + "External id": 509481,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144328162.090, "dur": 106.200, + "args": { + "External id": 509482,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144328281.196, "dur": 40.006, + "args": { + "External id": 509483,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144328330.085, "dur": 30.176, + "args": { + "External id": 509484,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144328389.864, "dur": 26.316, + "args": { + "External id": 509485,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144328439.761, "dur": 19.394, + "args": { + "External id": 509486,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144328486.858, "dur": 153.855, + "args": { + "External id": 509487,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144328539.257, "dur": 11.745, + "args": { + "External id": 509488,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144328544.839, "dur": 5.299, + "args": { + "External id": 509489,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144328554.153, "dur": 4.640, + "args": { + "External id": 509490,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144328560.205, "dur": 0.851, + "args": { + "External id": 509491,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144328564.243, "dur": 5.590, + "args": { + "External id": 509492,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144328652.897, "dur": 54.135, + "args": { + "External id": 509493,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144328741.931, "dur": 34.044, + "args": { + "External id": 509494,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144328786.297, "dur": 46.940, + "args": { + "External id": 509495,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144328842.737, "dur": 34.900, + "args": { + "External id": 509496,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144328901.523, "dur": 29.703, + "args": { + "External id": 509497,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144328938.936, "dur": 38.346, + "args": { + "External id": 509498,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144328995.012, "dur": 18.997, + "args": { + "External id": 509499,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17535 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.12)", "pid": 2338711, "tid": 2338711, + "ts": 6274144329205.082, "dur": 76.400, + "args": { + "External id": 509500,"Record function id": 0, "Ev Idx": 17536 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144329355.714, "dur": 46.229, + "args": { + "External id": 509501,"Record function id": 0, "Ev Idx": 17537 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.13)", "pid": 2338711, "tid": 2338711, + "ts": 6274144329410.750, "dur": 31404.228, + "args": { + "External id": 509502,"Record function id": 0, "Ev Idx": 17538 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2338711, "tid": 2338711, + "ts": 6274144329418.064, "dur": 1012.558, + "args": { + "External id": 509503,"Record function id": 0, "Ev Idx": 17539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144329497.321, "dur": 9.509, + "args": { + "External id": 509504,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144329519.639, "dur": 39.833, + "args": { + "External id": 509505,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144329525.305, "dur": 2.254, + "args": { + "External id": 509506,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144329532.529, "dur": 0.481, + "args": { + "External id": 509507,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144329534.647, "dur": 0.342, + "args": { + "External id": 509508,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144329535.870, "dur": 0.352, + "args": { + "External id": 509509,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144329540.403, "dur": 0.454, + "args": { + "External id": 509510,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144329542.345, "dur": 0.817, + "args": { + "External id": 509511,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144329545.120, "dur": 4.624, + "args": { + "External id": 509512,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144329551.295, "dur": 0.243, + "args": { + "External id": 509513,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144329553.217, "dur": 0.337, + "args": { + "External id": 509514,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144329570.714, "dur": 74.049, + "args": { + "External id": 509515,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144329697.312, "dur": 157.214, + "args": { + "External id": 509516,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144329727.970, "dur": 7.306, + "args": { + "External id": 509517,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144329741.110, "dur": 11.680, + "args": { + "External id": 509518,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144329745.769, "dur": 6.603, + "args": { + "External id": 509519,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144329749.820, "dur": 0.804, + "args": { + "External id": 509520,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144329760.236, "dur": 35.093, + "args": { + "External id": 509521,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144329762.299, "dur": 0.654, + "args": { + "External id": 509522,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144329764.844, "dur": 2.502, + "args": { + "External id": 509523,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144329768.665, "dur": 0.516, + "args": { + "External id": 509524,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144329771.364, "dur": 2.581, + "args": { + "External id": 509525,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144329780.975, "dur": 0.526, + "args": { + "External id": 509526,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144329782.675, "dur": 0.396, + "args": { + "External id": 509527,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144329784.341, "dur": 0.582, + "args": { + "External id": 509528,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144329788.090, "dur": 0.349, + "args": { + "External id": 509529,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144329789.426, "dur": 0.536, + "args": { + "External id": 509530,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144329808.077, "dur": 37.797, + "args": { + "External id": 509531,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144329912.009, "dur": 410.138, + "args": { + "External id": 509532,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144329943.508, "dur": 372.963, + "args": { + "External id": 509533,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17569, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144329954.393, "dur": 355.970, + "args": { + "External id": 509534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144330348.995, "dur": 2.685, + "args": { + "External id": 509535,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17571, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2338711, "tid": 2338711, + "ts": 6274144330452.953, "dur": 30154.773, + "args": { + "External id": 509536,"Record function id": 0, "Ev Idx": 17572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144330552.700, "dur": 6.424, + "args": { + "External id": 509537,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144330562.855, "dur": 1.124, + "args": { + "External id": 509538,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144330565.702, "dur": 3.237, + "args": { + "External id": 509539,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144330570.815, "dur": 0.837, + "args": { + "External id": 509540,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144330573.017, "dur": 0.995, + "args": { + "External id": 509541,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144330577.211, "dur": 0.574, + "args": { + "External id": 509542,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144330579.566, "dur": 0.763, + "args": { + "External id": 509543,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144330581.859, "dur": 2.492, + "args": { + "External id": 509544,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144330601.705, "dur": 1.390, + "args": { + "External id": 509545,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144330608.515, "dur": 0.618, + "args": { + "External id": 509546,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144330628.189, "dur": 29920.395, + "args": { + "External id": 509547,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144330644.537, "dur": 29895.803, + "args": { + "External id": 509548,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144330659.960, "dur": 16.051, + "args": { + "External id": 509549,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144330679.405, "dur": 29823.911, + "args": { + "External id": 509550,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144330682.133, "dur": 29820.560, + "args": { + "External id": 509551,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144330689.056, "dur": 5.989, + "args": { + "External id": 509552,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144330697.027, "dur": 29802.498, + "args": { + "External id": 509553,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144360745.726, "dur": 37.950, + "args": { + "External id": 509554,"Sequence number": 5516266, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17590 + } + }, + { + "ph": "s", "id": 421, "pid": 2338711, "tid": 2338711, "ts": 6274144360745.726, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144360770.309, "dur": 8.652, + "args": { + "External id": 509555,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144360774.462, "dur": 4.302, + "args": { + "External id": 509556,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144360855.443, "dur": 71.169, + "args": { + "External id": 509557,"Record function id": 0, "Ev Idx": 17593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144360928.219, "dur": 1185.448, + "args": { + "External id": 509558,"Record function id": 0, "Ev Idx": 17594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144360971.065, "dur": 1080.233, + "args": { + "External id": 509559,"Sequence number": 5516267, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17595 + } + }, + { + "ph": "s", "id": 420, "pid": 2338711, "tid": 2338711, "ts": 6274144360971.065, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144361040.692, "dur": 85.124, + "args": { + "External id": 509560,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144361141.951, "dur": 106.576, + "args": { + "External id": 509561,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144361264.240, "dur": 36.660, + "args": { + "External id": 509562,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144361312.430, "dur": 29.720, + "args": { + "External id": 509563,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144361376.463, "dur": 27.942, + "args": { + "External id": 509564,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144361424.513, "dur": 17.360, + "args": { + "External id": 509565,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144361464.387, "dur": 162.848, + "args": { + "External id": 509566,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144361519.273, "dur": 11.420, + "args": { + "External id": 509567,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144361524.708, "dur": 5.079, + "args": { + "External id": 509568,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144361533.662, "dur": 4.359, + "args": { + "External id": 509569,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144361539.540, "dur": 0.874, + "args": { + "External id": 509570,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144361545.080, "dur": 5.847, + "args": { + "External id": 509571,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144361639.757, "dur": 57.901, + "args": { + "External id": 509572,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144361730.709, "dur": 29.197, + "args": { + "External id": 509573,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144361771.868, "dur": 43.075, + "args": { + "External id": 509574,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144361821.462, "dur": 34.935, + "args": { + "External id": 509575,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144361880.613, "dur": 25.419, + "args": { + "External id": 509576,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144361911.906, "dur": 34.558, + "args": { + "External id": 509577,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144361963.206, "dur": 17.405, + "args": { + "External id": 509578,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17614 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.13)", "pid": 2338711, "tid": 2338711, + "ts": 6274144362184.575, "dur": 79.915, + "args": { + "External id": 509579,"Record function id": 0, "Ev Idx": 17615 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144362341.294, "dur": 48.424, + "args": { + "External id": 509580,"Record function id": 0, "Ev Idx": 17616 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.14)", "pid": 2338711, "tid": 2338711, + "ts": 6274144362398.594, "dur": 30722.517, + "args": { + "External id": 509581,"Record function id": 0, "Ev Idx": 17617 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2338711, "tid": 2338711, + "ts": 6274144362406.527, "dur": 964.491, + "args": { + "External id": 509582,"Record function id": 0, "Ev Idx": 17618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144362489.498, "dur": 9.283, + "args": { + "External id": 509583,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144362512.338, "dur": 43.446, + "args": { + "External id": 509584,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144362518.499, "dur": 2.496, + "args": { + "External id": 509585,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144362525.777, "dur": 0.305, + "args": { + "External id": 509586,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144362530.038, "dur": 0.440, + "args": { + "External id": 509587,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144362532.051, "dur": 0.548, + "args": { + "External id": 509588,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144362535.742, "dur": 0.611, + "args": { + "External id": 509589,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144362538.257, "dur": 0.595, + "args": { + "External id": 509590,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144362540.080, "dur": 4.776, + "args": { + "External id": 509591,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144362546.664, "dur": 0.596, + "args": { + "External id": 509592,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144362548.508, "dur": 0.376, + "args": { + "External id": 509593,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144362567.674, "dur": 71.655, + "args": { + "External id": 509594,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144362676.574, "dur": 129.106, + "args": { + "External id": 509595,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144362689.458, "dur": 5.539, + "args": { + "External id": 509596,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144362702.524, "dur": 11.652, + "args": { + "External id": 509597,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144362707.318, "dur": 6.453, + "args": { + "External id": 509598,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144362711.364, "dur": 0.887, + "args": { + "External id": 509599,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144362721.650, "dur": 32.993, + "args": { + "External id": 509600,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144362723.843, "dur": 2.649, + "args": { + "External id": 509601,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144362728.349, "dur": 0.740, + "args": { + "External id": 509602,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144362730.524, "dur": 0.399, + "args": { + "External id": 509603,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144362734.865, "dur": 2.334, + "args": { + "External id": 509604,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144362738.183, "dur": 0.694, + "args": { + "External id": 509605,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144362740.244, "dur": 0.576, + "args": { + "External id": 509606,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144362743.650, "dur": 0.576, + "args": { + "External id": 509607,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144362746.000, "dur": 0.548, + "args": { + "External id": 509608,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144362747.974, "dur": 1.990, + "args": { + "External id": 509609,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144362765.565, "dur": 32.358, + "args": { + "External id": 509610,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144362859.870, "dur": 409.171, + "args": { + "External id": 509611,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144362890.686, "dur": 372.262, + "args": { + "External id": 509612,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17648, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144362900.751, "dur": 353.053, + "args": { + "External id": 509613,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144363295.914, "dur": 2.267, + "args": { + "External id": 509614,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17650, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2338711, "tid": 2338711, + "ts": 6274144363392.679, "dur": 29507.958, + "args": { + "External id": 509615,"Record function id": 0, "Ev Idx": 17651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144363491.419, "dur": 6.344, + "args": { + "External id": 509616,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144363501.136, "dur": 1.059, + "args": { + "External id": 509617,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144363504.129, "dur": 3.228, + "args": { + "External id": 509618,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144363508.951, "dur": 0.824, + "args": { + "External id": 509619,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144363511.527, "dur": 0.809, + "args": { + "External id": 509620,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144363513.571, "dur": 0.661, + "args": { + "External id": 509621,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144363518.096, "dur": 1.140, + "args": { + "External id": 509622,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144363520.705, "dur": 2.643, + "args": { + "External id": 509623,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144363524.647, "dur": 0.704, + "args": { + "External id": 509624,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144363527.063, "dur": 0.747, + "args": { + "External id": 509625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144363548.218, "dur": 29305.587, + "args": { + "External id": 509626,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144363563.977, "dur": 29281.793, + "args": { + "External id": 509627,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144363577.803, "dur": 34.588, + "args": { + "External id": 509628,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144363616.909, "dur": 29192.598, + "args": { + "External id": 509629,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144363619.659, "dur": 29189.188, + "args": { + "External id": 509630,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144363626.855, "dur": 6.662, + "args": { + "External id": 509631,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144363635.609, "dur": 29170.054, + "args": { + "External id": 509632,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144393037.713, "dur": 56.055, + "args": { + "External id": 509633,"Sequence number": 5516268, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17669 + } + }, + { + "ph": "s", "id": 419, "pid": 2338711, "tid": 2338711, "ts": 6274144393037.713, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144393079.040, "dur": 9.479, + "args": { + "External id": 509634,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144393083.273, "dur": 4.926, + "args": { + "External id": 509635,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144393162.514, "dur": 72.312, + "args": { + "External id": 509636,"Record function id": 0, "Ev Idx": 17672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144393236.250, "dur": 1128.294, + "args": { + "External id": 509637,"Record function id": 0, "Ev Idx": 17673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144393276.408, "dur": 1074.020, + "args": { + "External id": 509638,"Sequence number": 5516269, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17674 + } + }, + { + "ph": "s", "id": 418, "pid": 2338711, "tid": 2338711, "ts": 6274144393276.408, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144393346.341, "dur": 50.660, + "args": { + "External id": 509639,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144393408.302, "dur": 102.194, + "args": { + "External id": 509640,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144393521.913, "dur": 41.098, + "args": { + "External id": 509641,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144393572.628, "dur": 50.719, + "args": { + "External id": 509642,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144393653.575, "dur": 26.043, + "args": { + "External id": 509643,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144393700.491, "dur": 16.581, + "args": { + "External id": 509644,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144393740.829, "dur": 131.595, + "args": { + "External id": 509645,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144393791.275, "dur": 11.951, + "args": { + "External id": 509646,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144393796.975, "dur": 5.469, + "args": { + "External id": 509647,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144393806.392, "dur": 4.299, + "args": { + "External id": 509648,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144393812.222, "dur": 1.134, + "args": { + "External id": 509649,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144393816.386, "dur": 5.463, + "args": { + "External id": 509650,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144393883.061, "dur": 50.339, + "args": { + "External id": 509651,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144393964.461, "dur": 27.065, + "args": { + "External id": 509652,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144394002.279, "dur": 41.928, + "args": { + "External id": 509653,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144394099.277, "dur": 42.318, + "args": { + "External id": 509654,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144394171.231, "dur": 27.376, + "args": { + "External id": 509655,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144394207.620, "dur": 36.347, + "args": { + "External id": 509656,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144394261.658, "dur": 17.767, + "args": { + "External id": 509657,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17693 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.14)", "pid": 2338711, "tid": 2338711, + "ts": 6274144394432.724, "dur": 80.205, + "args": { + "External id": 509658,"Record function id": 0, "Ev Idx": 17694 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144394613.850, "dur": 51.999, + "args": { + "External id": 509659,"Record function id": 0, "Ev Idx": 17695 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.15)", "pid": 2338711, "tid": 2338711, + "ts": 6274144394674.767, "dur": 31937.347, + "args": { + "External id": 509660,"Record function id": 0, "Ev Idx": 17696 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2338711, "tid": 2338711, + "ts": 6274144394683.093, "dur": 978.751, + "args": { + "External id": 509661,"Record function id": 0, "Ev Idx": 17697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144394772.087, "dur": 10.260, + "args": { + "External id": 509662,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144394796.638, "dur": 40.683, + "args": { + "External id": 509663,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144394803.018, "dur": 2.150, + "args": { + "External id": 509664,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144394809.403, "dur": 0.415, + "args": { + "External id": 509665,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144394811.293, "dur": 0.880, + "args": { + "External id": 509666,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144394813.676, "dur": 0.597, + "args": { + "External id": 509667,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144394818.069, "dur": 0.646, + "args": { + "External id": 509668,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144394819.855, "dur": 0.765, + "args": { + "External id": 509669,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144394822.538, "dur": 4.629, + "args": { + "External id": 509670,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144394828.189, "dur": 0.449, + "args": { + "External id": 509671,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144394830.284, "dur": 0.553, + "args": { + "External id": 509672,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144394849.307, "dur": 54.578, + "args": { + "External id": 509673,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144394940.849, "dur": 182.592, + "args": { + "External id": 509674,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144394951.635, "dur": 3.803, + "args": { + "External id": 509675,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144394960.594, "dur": 10.960, + "args": { + "External id": 509676,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144394965.164, "dur": 5.996, + "args": { + "External id": 509677,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144394969.155, "dur": 0.862, + "args": { + "External id": 509678,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144394982.436, "dur": 33.081, + "args": { + "External id": 509679,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144394984.408, "dur": 2.771, + "args": { + "External id": 509680,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144394988.930, "dur": 0.506, + "args": { + "External id": 509681,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144394991.108, "dur": 0.460, + "args": { + "External id": 509682,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144394995.355, "dur": 2.397, + "args": { + "External id": 509683,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144394999.158, "dur": 0.434, + "args": { + "External id": 509684,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144395001.429, "dur": 0.529, + "args": { + "External id": 509685,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144395005.229, "dur": 0.552, + "args": { + "External id": 509686,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144395007.102, "dur": 0.438, + "args": { + "External id": 509687,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144395008.981, "dur": 2.357, + "args": { + "External id": 509688,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144395028.262, "dur": 84.303, + "args": { + "External id": 509689,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144395182.468, "dur": 359.724, + "args": { + "External id": 509690,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144395214.704, "dur": 322.455, + "args": { + "External id": 509691,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17727, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144395225.113, "dur": 306.589, + "args": { + "External id": 509692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144395565.301, "dur": 2.498, + "args": { + "External id": 509693,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17729, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2338711, "tid": 2338711, + "ts": 6274144395686.014, "dur": 30714.053, + "args": { + "External id": 509694,"Record function id": 0, "Ev Idx": 17730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144395784.795, "dur": 6.825, + "args": { + "External id": 509695,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144395795.186, "dur": 0.871, + "args": { + "External id": 509696,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144395797.826, "dur": 3.200, + "args": { + "External id": 509697,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144395803.258, "dur": 1.030, + "args": { + "External id": 509698,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144395806.030, "dur": 0.774, + "args": { + "External id": 509699,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144395808.287, "dur": 1.044, + "args": { + "External id": 509700,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144395813.315, "dur": 1.247, + "args": { + "External id": 509701,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144395816.293, "dur": 2.175, + "args": { + "External id": 509702,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144395819.828, "dur": 1.083, + "args": { + "External id": 509703,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144395822.799, "dur": 0.756, + "args": { + "External id": 509704,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144395844.797, "dur": 30510.123, + "args": { + "External id": 509705,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144395861.424, "dur": 30485.528, + "args": { + "External id": 509706,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144395876.970, "dur": 17.629, + "args": { + "External id": 509707,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144395898.179, "dur": 30414.376, + "args": { + "External id": 509708,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144395901.109, "dur": 30410.769, + "args": { + "External id": 509709,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144395908.024, "dur": 5.638, + "args": { + "External id": 509710,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144395915.252, "dur": 30393.507, + "args": { + "External id": 509711,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144426539.465, "dur": 31.830, + "args": { + "External id": 509712,"Sequence number": 5516270, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17748 + } + }, + { + "ph": "s", "id": 417, "pid": 2338711, "tid": 2338711, "ts": 6274144426539.465, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144426558.221, "dur": 8.187, + "args": { + "External id": 509713,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144426562.112, "dur": 4.150, + "args": { + "External id": 509714,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144426656.718, "dur": 72.380, + "args": { + "External id": 509715,"Record function id": 0, "Ev Idx": 17751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144426730.271, "dur": 1148.469, + "args": { + "External id": 509716,"Record function id": 0, "Ev Idx": 17752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144426770.215, "dur": 1095.280, + "args": { + "External id": 509717,"Sequence number": 5516271, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17753 + } + }, + { + "ph": "s", "id": 416, "pid": 2338711, "tid": 2338711, "ts": 6274144426770.215, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144426839.080, "dur": 52.108, + "args": { + "External id": 509718,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144426903.602, "dur": 107.388, + "args": { + "External id": 509719,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144427024.899, "dur": 76.273, + "args": { + "External id": 509720,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144427113.506, "dur": 34.127, + "args": { + "External id": 509721,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144427178.123, "dur": 29.316, + "args": { + "External id": 509722,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144427225.781, "dur": 17.887, + "args": { + "External id": 509723,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144427268.849, "dur": 140.587, + "args": { + "External id": 509724,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144427324.348, "dur": 11.628, + "args": { + "External id": 509725,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144427329.871, "dur": 5.331, + "args": { + "External id": 509726,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144427338.597, "dur": 4.226, + "args": { + "External id": 509727,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144427344.454, "dur": 1.271, + "args": { + "External id": 509728,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144427349.911, "dur": 5.503, + "args": { + "External id": 509729,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144427420.319, "dur": 48.568, + "args": { + "External id": 509730,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144427502.567, "dur": 29.589, + "args": { + "External id": 509731,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144427542.749, "dur": 41.813, + "args": { + "External id": 509732,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144427611.337, "dur": 40.220, + "args": { + "External id": 509733,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144427676.062, "dur": 31.052, + "args": { + "External id": 509734,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144427713.169, "dur": 39.973, + "args": { + "External id": 509735,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144427771.552, "dur": 22.990, + "args": { + "External id": 509736,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17772 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.15)", "pid": 2338711, "tid": 2338711, + "ts": 6274144427945.838, "dur": 79.952, + "args": { + "External id": 509737,"Record function id": 0, "Ev Idx": 17773 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144428141.340, "dur": 54.128, + "args": { + "External id": 509738,"Record function id": 0, "Ev Idx": 17774 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.16)", "pid": 2338711, "tid": 2338711, + "ts": 6274144428205.008, "dur": 30937.595, + "args": { + "External id": 509739,"Record function id": 0, "Ev Idx": 17775 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2338711, "tid": 2338711, + "ts": 6274144428212.904, "dur": 1002.003, + "args": { + "External id": 509740,"Record function id": 0, "Ev Idx": 17776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144428295.417, "dur": 8.470, + "args": { + "External id": 509741,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144428317.588, "dur": 46.811, + "args": { + "External id": 509742,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144428323.526, "dur": 2.501, + "args": { + "External id": 509743,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144428331.565, "dur": 0.546, + "args": { + "External id": 509744,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144428333.754, "dur": 0.632, + "args": { + "External id": 509745,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144428336.149, "dur": 0.395, + "args": { + "External id": 509746,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144428340.489, "dur": 0.519, + "args": { + "External id": 509747,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144428346.431, "dur": 0.429, + "args": { + "External id": 509748,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144428348.181, "dur": 3.557, + "args": { + "External id": 509749,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144428353.757, "dur": 0.532, + "args": { + "External id": 509750,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144428357.685, "dur": 0.396, + "args": { + "External id": 509751,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144428375.802, "dur": 56.063, + "args": { + "External id": 509752,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144428464.897, "dur": 143.051, + "args": { + "External id": 509753,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144428475.721, "dur": 3.858, + "args": { + "External id": 509754,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144428486.758, "dur": 12.851, + "args": { + "External id": 509755,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144428491.117, "dur": 8.082, + "args": { + "External id": 509756,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144428494.880, "dur": 2.963, + "args": { + "External id": 509757,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144428506.569, "dur": 31.946, + "args": { + "External id": 509758,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144428508.631, "dur": 0.558, + "args": { + "External id": 509759,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144428510.970, "dur": 0.959, + "args": { + "External id": 509760,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144428512.865, "dur": 0.451, + "args": { + "External id": 509761,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144428516.869, "dur": 2.727, + "args": { + "External id": 509762,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144428520.712, "dur": 0.343, + "args": { + "External id": 509763,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144428522.984, "dur": 2.808, + "args": { + "External id": 509764,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144428526.833, "dur": 0.438, + "args": { + "External id": 509765,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144428529.291, "dur": 0.533, + "args": { + "External id": 509766,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144428533.524, "dur": 0.540, + "args": { + "External id": 509767,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144428549.935, "dur": 33.301, + "args": { + "External id": 509768,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144428666.860, "dur": 433.729, + "args": { + "External id": 509769,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144428698.118, "dur": 396.030, + "args": { + "External id": 509770,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17806, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144428708.796, "dur": 377.153, + "args": { + "External id": 509771,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144429127.145, "dur": 3.007, + "args": { + "External id": 509772,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17808, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2338711, "tid": 2338711, + "ts": 6274144429240.406, "dur": 29674.139, + "args": { + "External id": 509773,"Record function id": 0, "Ev Idx": 17809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144429344.356, "dur": 7.041, + "args": { + "External id": 509774,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144429355.383, "dur": 1.378, + "args": { + "External id": 509775,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144429358.404, "dur": 3.806, + "args": { + "External id": 509776,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144429363.912, "dur": 1.127, + "args": { + "External id": 509777,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144429366.299, "dur": 0.755, + "args": { + "External id": 509778,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144429370.555, "dur": 0.986, + "args": { + "External id": 509779,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144429373.084, "dur": 0.773, + "args": { + "External id": 509780,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144429375.368, "dur": 2.196, + "args": { + "External id": 509781,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144429378.788, "dur": 0.816, + "args": { + "External id": 509782,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144429383.131, "dur": 0.607, + "args": { + "External id": 509783,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144429401.828, "dur": 29470.003, + "args": { + "External id": 509784,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144429417.454, "dur": 29447.058, + "args": { + "External id": 509785,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144429432.110, "dur": 15.935, + "args": { + "External id": 509786,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144429451.528, "dur": 29377.319, + "args": { + "External id": 509787,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144429454.440, "dur": 29373.824, + "args": { + "External id": 509788,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144429460.478, "dur": 5.444, + "args": { + "External id": 509789,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144429467.744, "dur": 29357.296, + "args": { + "External id": 509790,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144459052.088, "dur": 56.227, + "args": { + "External id": 509791,"Sequence number": 5516272, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17827 + } + }, + { + "ph": "s", "id": 415, "pid": 2338711, "tid": 2338711, "ts": 6274144459052.088, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144459094.623, "dur": 8.966, + "args": { + "External id": 509792,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144459098.589, "dur": 4.616, + "args": { + "External id": 509793,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144459187.785, "dur": 72.578, + "args": { + "External id": 509794,"Record function id": 0, "Ev Idx": 17830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144459261.494, "dur": 1139.992, + "args": { + "External id": 509795,"Record function id": 0, "Ev Idx": 17831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144459299.380, "dur": 1087.928, + "args": { + "External id": 509796,"Sequence number": 5516273, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17832 + } + }, + { + "ph": "s", "id": 414, "pid": 2338711, "tid": 2338711, "ts": 6274144459299.380, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144459367.624, "dur": 53.318, + "args": { + "External id": 509797,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144459433.309, "dur": 109.588, + "args": { + "External id": 509798,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144459554.198, "dur": 58.392, + "args": { + "External id": 509799,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144459627.617, "dur": 34.082, + "args": { + "External id": 509800,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144459689.561, "dur": 29.093, + "args": { + "External id": 509801,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144459740.455, "dur": 19.237, + "args": { + "External id": 509802,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144459783.328, "dur": 135.296, + "args": { + "External id": 509803,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144459835.400, "dur": 11.592, + "args": { + "External id": 509804,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144459841.066, "dur": 5.109, + "args": { + "External id": 509805,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144459849.768, "dur": 4.165, + "args": { + "External id": 509806,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144459855.259, "dur": 1.337, + "args": { + "External id": 509807,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144459859.411, "dur": 5.549, + "args": { + "External id": 509808,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144459929.228, "dur": 50.128, + "args": { + "External id": 509809,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144460013.276, "dur": 29.301, + "args": { + "External id": 509810,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144460085.590, "dur": 48.573, + "args": { + "External id": 509811,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144460146.926, "dur": 35.476, + "args": { + "External id": 509812,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144460208.982, "dur": 26.963, + "args": { + "External id": 509813,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144460244.366, "dur": 36.279, + "args": { + "External id": 509814,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144460299.884, "dur": 17.438, + "args": { + "External id": 509815,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17851 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.16)", "pid": 2338711, "tid": 2338711, + "ts": 6274144460467.626, "dur": 80.298, + "args": { + "External id": 509816,"Record function id": 0, "Ev Idx": 17852 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144460649.872, "dur": 53.189, + "args": { + "External id": 509817,"Record function id": 0, "Ev Idx": 17853 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.17)", "pid": 2338711, "tid": 2338711, + "ts": 6274144460711.989, "dur": 30690.850, + "args": { + "External id": 509818,"Record function id": 0, "Ev Idx": 17854 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2338711, "tid": 2338711, + "ts": 6274144460719.728, "dur": 965.555, + "args": { + "External id": 509819,"Record function id": 0, "Ev Idx": 17855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144460803.391, "dur": 9.654, + "args": { + "External id": 509820,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144460826.400, "dur": 42.845, + "args": { + "External id": 509821,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144460832.641, "dur": 2.630, + "args": { + "External id": 509822,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144460840.416, "dur": 0.425, + "args": { + "External id": 509823,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144460842.403, "dur": 0.696, + "args": { + "External id": 509824,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144460844.652, "dur": 0.424, + "args": { + "External id": 509825,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144460849.125, "dur": 0.501, + "args": { + "External id": 509826,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144460851.130, "dur": 0.568, + "args": { + "External id": 509827,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144460853.058, "dur": 5.429, + "args": { + "External id": 509828,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144460859.561, "dur": 0.584, + "args": { + "External id": 509829,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144460862.127, "dur": 0.316, + "args": { + "External id": 509830,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144460881.065, "dur": 55.402, + "args": { + "External id": 509831,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144460969.574, "dur": 181.460, + "args": { + "External id": 509832,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144460980.897, "dur": 3.658, + "args": { + "External id": 509833,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144460989.812, "dur": 11.067, + "args": { + "External id": 509834,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144460994.602, "dur": 5.852, + "args": { + "External id": 509835,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144460998.670, "dur": 0.567, + "args": { + "External id": 509836,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144461007.603, "dur": 32.287, + "args": { + "External id": 509837,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144461009.452, "dur": 3.059, + "args": { + "External id": 509838,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144461013.925, "dur": 0.516, + "args": { + "External id": 509839,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144461015.833, "dur": 0.469, + "args": { + "External id": 509840,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144461019.379, "dur": 2.579, + "args": { + "External id": 509841,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144461023.418, "dur": 0.440, + "args": { + "External id": 509842,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144461025.200, "dur": 0.389, + "args": { + "External id": 509843,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144461029.512, "dur": 0.346, + "args": { + "External id": 509844,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144461031.114, "dur": 0.336, + "args": { + "External id": 509845,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144461033.448, "dur": 2.143, + "args": { + "External id": 509846,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144461052.359, "dur": 88.351, + "args": { + "External id": 509847,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144461208.907, "dur": 357.801, + "args": { + "External id": 509848,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144461242.489, "dur": 318.493, + "args": { + "External id": 509849,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17885, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144461252.267, "dur": 303.338, + "args": { + "External id": 509850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144461606.714, "dur": 3.389, + "args": { + "External id": 509851,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17887, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2338711, "tid": 2338711, + "ts": 6274144461706.939, "dur": 29501.960, + "args": { + "External id": 509852,"Record function id": 0, "Ev Idx": 17888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144461806.950, "dur": 7.254, + "args": { + "External id": 509853,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144461817.897, "dur": 1.140, + "args": { + "External id": 509854,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144461820.669, "dur": 3.248, + "args": { + "External id": 509855,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144461825.617, "dur": 0.838, + "args": { + "External id": 509856,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144461827.775, "dur": 0.709, + "args": { + "External id": 509857,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144461829.892, "dur": 0.781, + "args": { + "External id": 509858,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144461834.258, "dur": 0.848, + "args": { + "External id": 509859,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144461837.054, "dur": 2.020, + "args": { + "External id": 509860,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144461840.469, "dur": 0.939, + "args": { + "External id": 509861,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144461843.159, "dur": 0.876, + "args": { + "External id": 509862,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144461872.011, "dur": 29293.316, + "args": { + "External id": 509863,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144461888.703, "dur": 29268.334, + "args": { + "External id": 509864,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144461902.924, "dur": 15.951, + "args": { + "External id": 509865,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144461922.581, "dur": 29200.458, + "args": { + "External id": 509866,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144461925.341, "dur": 29196.927, + "args": { + "External id": 509867,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144461931.784, "dur": 5.902, + "args": { + "External id": 509868,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144461939.383, "dur": 29179.880, + "args": { + "External id": 509869,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144491346.417, "dur": 30.794, + "args": { + "External id": 509870,"Sequence number": 5516274, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17906 + } + }, + { + "ph": "s", "id": 413, "pid": 2338711, "tid": 2338711, "ts": 6274144491346.417, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144491364.088, "dur": 8.589, + "args": { + "External id": 509871,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144491368.074, "dur": 4.379, + "args": { + "External id": 509872,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144491443.396, "dur": 72.851, + "args": { + "External id": 509873,"Record function id": 0, "Ev Idx": 17909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144491517.653, "dur": 1168.760, + "args": { + "External id": 509874,"Record function id": 0, "Ev Idx": 17910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144491560.399, "dur": 1111.570, + "args": { + "External id": 509875,"Sequence number": 5516275, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17911 + } + }, + { + "ph": "s", "id": 412, "pid": 2338711, "tid": 2338711, "ts": 6274144491560.399, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144491649.610, "dur": 52.125, + "args": { + "External id": 509876,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144491715.986, "dur": 106.369, + "args": { + "External id": 509877,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144491833.961, "dur": 37.121, + "args": { + "External id": 509878,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144491880.298, "dur": 30.334, + "args": { + "External id": 509879,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144491936.905, "dur": 24.991, + "args": { + "External id": 509880,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144491982.225, "dur": 18.481, + "args": { + "External id": 509881,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144492024.581, "dur": 179.593, + "args": { + "External id": 509882,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144492116.063, "dur": 12.956, + "args": { + "External id": 509883,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144492121.640, "dur": 6.418, + "args": { + "External id": 509884,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144492131.892, "dur": 4.561, + "args": { + "External id": 509885,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144492138.020, "dur": 1.114, + "args": { + "External id": 509886,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144492141.683, "dur": 6.123, + "args": { + "External id": 509887,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144492216.509, "dur": 56.394, + "args": { + "External id": 509888,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144492306.164, "dur": 35.373, + "args": { + "External id": 509889,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144492351.805, "dur": 44.464, + "args": { + "External id": 509890,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144492406.849, "dur": 35.025, + "args": { + "External id": 509891,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144492466.264, "dur": 26.993, + "args": { + "External id": 509892,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144492501.767, "dur": 35.843, + "args": { + "External id": 509893,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144492559.656, "dur": 19.386, + "args": { + "External id": 509894,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17930 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.17)", "pid": 2338711, "tid": 2338711, + "ts": 6274144492754.392, "dur": 87.635, + "args": { + "External id": 509895,"Record function id": 0, "Ev Idx": 17931 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144492916.178, "dur": 46.290, + "args": { + "External id": 509896,"Record function id": 0, "Ev Idx": 17932 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.18)", "pid": 2338711, "tid": 2338711, + "ts": 6274144492971.036, "dur": 31952.872, + "args": { + "External id": 509897,"Record function id": 0, "Ev Idx": 17933 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2338711, "tid": 2338711, + "ts": 6274144492978.005, "dur": 961.625, + "args": { + "External id": 509898,"Record function id": 0, "Ev Idx": 17934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144493097.748, "dur": 12.939, + "args": { + "External id": 509899,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144493124.959, "dur": 41.856, + "args": { + "External id": 509900,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144493131.579, "dur": 2.482, + "args": { + "External id": 509901,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144493138.603, "dur": 0.474, + "args": { + "External id": 509902,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144493140.668, "dur": 0.504, + "args": { + "External id": 509903,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144493142.421, "dur": 0.404, + "args": { + "External id": 509904,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144493146.697, "dur": 0.545, + "args": { + "External id": 509905,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144493148.510, "dur": 0.506, + "args": { + "External id": 509906,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144493150.229, "dur": 4.541, + "args": { + "External id": 509907,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144493155.851, "dur": 0.274, + "args": { + "External id": 509908,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144493157.365, "dur": 0.523, + "args": { + "External id": 509909,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144493179.015, "dur": 55.691, + "args": { + "External id": 509910,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144493273.616, "dur": 123.548, + "args": { + "External id": 509911,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144493284.707, "dur": 5.568, + "args": { + "External id": 509912,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144493295.681, "dur": 10.882, + "args": { + "External id": 509913,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144493300.254, "dur": 5.921, + "args": { + "External id": 509914,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144493304.301, "dur": 0.648, + "args": { + "External id": 509915,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144493312.904, "dur": 32.546, + "args": { + "External id": 509916,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144493314.897, "dur": 0.601, + "args": { + "External id": 509917,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144493317.877, "dur": 2.665, + "args": { + "External id": 509918,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144493321.769, "dur": 0.407, + "args": { + "External id": 509919,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144493324.054, "dur": 2.891, + "args": { + "External id": 509920,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144493330.256, "dur": 0.406, + "args": { + "External id": 509921,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144493332.563, "dur": 0.426, + "args": { + "External id": 509922,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144493334.232, "dur": 0.542, + "args": { + "External id": 509923,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144493339.055, "dur": 0.469, + "args": { + "External id": 509924,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144493340.934, "dur": 0.396, + "args": { + "External id": 509925,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144493357.409, "dur": 32.060, + "args": { + "External id": 509926,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144493452.675, "dur": 380.355, + "args": { + "External id": 509927,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144493484.237, "dur": 342.761, + "args": { + "External id": 509928,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17964, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144493494.238, "dur": 326.579, + "args": { + "External id": 509929,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144493862.027, "dur": 2.240, + "args": { + "External id": 509930,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17966, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2338711, "tid": 2338711, + "ts": 6274144493960.956, "dur": 30766.518, + "args": { + "External id": 509931,"Record function id": 0, "Ev Idx": 17967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144494106.447, "dur": 10.525, + "args": { + "External id": 509932,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144494120.799, "dur": 1.394, + "args": { + "External id": 509933,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144494124.265, "dur": 3.521, + "args": { + "External id": 509934,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144494129.644, "dur": 1.062, + "args": { + "External id": 509935,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144494132.134, "dur": 0.968, + "args": { + "External id": 509936,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144494134.639, "dur": 0.784, + "args": { + "External id": 509937,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144494137.251, "dur": 0.839, + "args": { + "External id": 509938,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144494139.516, "dur": 2.016, + "args": { + "External id": 509939,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144494143.064, "dur": 0.873, + "args": { + "External id": 509940,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144494147.733, "dur": 0.762, + "args": { + "External id": 509941,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144494168.317, "dur": 30515.861, + "args": { + "External id": 509942,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144494185.406, "dur": 30490.690, + "args": { + "External id": 509943,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144494199.968, "dur": 16.682, + "args": { + "External id": 509944,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144494220.240, "dur": 30416.742, + "args": { + "External id": 509945,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144494222.837, "dur": 30413.588, + "args": { + "External id": 509946,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144494229.229, "dur": 5.957, + "args": { + "External id": 509947,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144494237.225, "dur": 30395.999, + "args": { + "External id": 509948,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144524869.377, "dur": 29.611, + "args": { + "External id": 509949,"Sequence number": 5516276, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17985 + } + }, + { + "ph": "s", "id": 411, "pid": 2338711, "tid": 2338711, "ts": 6274144524869.377, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144524886.174, "dur": 8.129, + "args": { + "External id": 509950,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144524889.901, "dur": 4.178, + "args": { + "External id": 509951,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144524964.950, "dur": 69.756, + "args": { + "External id": 509952,"Record function id": 0, "Ev Idx": 17988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144525098.583, "dur": 1149.807, + "args": { + "External id": 509953,"Record function id": 0, "Ev Idx": 17989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144525141.465, "dur": 1092.270, + "args": { + "External id": 509954,"Sequence number": 5516277, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17990 + } + }, + { + "ph": "s", "id": 410, "pid": 2338711, "tid": 2338711, "ts": 6274144525141.465, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144525215.639, "dur": 51.949, + "args": { + "External id": 509955,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144525280.803, "dur": 105.223, + "args": { + "External id": 509956,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144525399.308, "dur": 39.831, + "args": { + "External id": 509957,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144525445.953, "dur": 29.508, + "args": { + "External id": 509958,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144525502.927, "dur": 24.353, + "args": { + "External id": 509959,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144525546.688, "dur": 17.511, + "args": { + "External id": 509960,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144525604.940, "dur": 152.730, + "args": { + "External id": 509961,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144525664.889, "dur": 15.560, + "args": { + "External id": 509962,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144525670.444, "dur": 8.967, + "args": { + "External id": 509963,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144525683.218, "dur": 4.280, + "args": { + "External id": 509964,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144525688.926, "dur": 1.313, + "args": { + "External id": 509965,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144525694.664, "dur": 5.334, + "args": { + "External id": 509966,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144525768.683, "dur": 55.294, + "args": { + "External id": 509967,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144525861.130, "dur": 27.944, + "args": { + "External id": 509968,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144525899.126, "dur": 46.952, + "args": { + "External id": 509969,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144525954.105, "dur": 34.500, + "args": { + "External id": 509970,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144526008.969, "dur": 28.623, + "args": { + "External id": 509971,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144526043.286, "dur": 70.569, + "args": { + "External id": 509972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144526139.732, "dur": 20.044, + "args": { + "External id": 509973,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18009 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.18)", "pid": 2338711, "tid": 2338711, + "ts": 6274144526315.929, "dur": 80.976, + "args": { + "External id": 509974,"Record function id": 0, "Ev Idx": 18010 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144526474.701, "dur": 48.706, + "args": { + "External id": 509975,"Record function id": 0, "Ev Idx": 18011 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.19)", "pid": 2338711, "tid": 2338711, + "ts": 6274144526532.907, "dur": 31353.590, + "args": { + "External id": 509976,"Record function id": 0, "Ev Idx": 18012 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2338711, "tid": 2338711, + "ts": 6274144526541.079, "dur": 983.289, + "args": { + "External id": 509977,"Record function id": 0, "Ev Idx": 18013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144526640.423, "dur": 9.751, + "args": { + "External id": 509978,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144526664.805, "dur": 41.014, + "args": { + "External id": 509979,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144526671.260, "dur": 2.400, + "args": { + "External id": 509980,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144526678.228, "dur": 0.430, + "args": { + "External id": 509981,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144526680.336, "dur": 0.479, + "args": { + "External id": 509982,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144526682.398, "dur": 0.712, + "args": { + "External id": 509983,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144526686.607, "dur": 0.673, + "args": { + "External id": 509984,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144526688.503, "dur": 0.519, + "args": { + "External id": 509985,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144526690.204, "dur": 5.119, + "args": { + "External id": 509986,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144526696.988, "dur": 0.539, + "args": { + "External id": 509987,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144526699.081, "dur": 0.470, + "args": { + "External id": 509988,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144526717.911, "dur": 56.140, + "args": { + "External id": 509989,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144526808.873, "dur": 141.007, + "args": { + "External id": 509990,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144526820.341, "dur": 4.116, + "args": { + "External id": 509991,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144526837.092, "dur": 19.159, + "args": { + "External id": 509992,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144526841.583, "dur": 14.238, + "args": { + "External id": 509993,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144526853.808, "dur": 0.658, + "args": { + "External id": 509994,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144526863.365, "dur": 30.111, + "args": { + "External id": 509995,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144526865.116, "dur": 4.165, + "args": { + "External id": 509996,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144526870.856, "dur": 0.536, + "args": { + "External id": 509997,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144526872.667, "dur": 0.303, + "args": { + "External id": 509998,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144526876.330, "dur": 2.469, + "args": { + "External id": 509999,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144526879.844, "dur": 0.404, + "args": { + "External id": 510000,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144526881.707, "dur": 0.555, + "args": { + "External id": 510001,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144526883.460, "dur": 0.400, + "args": { + "External id": 510002,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144526885.422, "dur": 0.403, + "args": { + "External id": 510003,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144526886.865, "dur": 2.518, + "args": { + "External id": 510004,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144526909.257, "dur": 32.770, + "args": { + "External id": 510005,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144527003.884, "dur": 408.453, + "args": { + "External id": 510006,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144527034.326, "dur": 371.970, + "args": { + "External id": 510007,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18043, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144527046.562, "dur": 350.479, + "args": { + "External id": 510008,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144527439.200, "dur": 2.479, + "args": { + "External id": 510009,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18045, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2338711, "tid": 2338711, + "ts": 6274144527546.748, "dur": 30145.130, + "args": { + "External id": 510010,"Record function id": 0, "Ev Idx": 18046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144527673.446, "dur": 6.798, + "args": { + "External id": 510011,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144527683.972, "dur": 1.078, + "args": { + "External id": 510012,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144527687.011, "dur": 3.075, + "args": { + "External id": 510013,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144527691.908, "dur": 0.867, + "args": { + "External id": 510014,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144527694.272, "dur": 0.994, + "args": { + "External id": 510015,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144527696.431, "dur": 0.701, + "args": { + "External id": 510016,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144527698.786, "dur": 0.677, + "args": { + "External id": 510017,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144527701.254, "dur": 2.302, + "args": { + "External id": 510018,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144527704.821, "dur": 0.543, + "args": { + "External id": 510019,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144527709.462, "dur": 0.786, + "args": { + "External id": 510020,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144527728.816, "dur": 29920.159, + "args": { + "External id": 510021,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144527744.904, "dur": 29896.020, + "args": { + "External id": 510022,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144527759.727, "dur": 17.201, + "args": { + "External id": 510023,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144527780.468, "dur": 29826.638, + "args": { + "External id": 510024,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144527783.084, "dur": 29823.325, + "args": { + "External id": 510025,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144527789.331, "dur": 5.030, + "args": { + "External id": 510026,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144527795.966, "dur": 29807.432, + "args": { + "External id": 510027,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144557829.694, "dur": 32.541, + "args": { + "External id": 510028,"Sequence number": 5516278, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18064 + } + }, + { + "ph": "s", "id": 409, "pid": 2338711, "tid": 2338711, "ts": 6274144557829.694, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144557846.895, "dur": 10.528, + "args": { + "External id": 510029,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144557853.310, "dur": 3.937, + "args": { + "External id": 510030,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144557927.733, "dur": 71.434, + "args": { + "External id": 510031,"Record function id": 0, "Ev Idx": 18067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144558000.236, "dur": 1146.899, + "args": { + "External id": 510032,"Record function id": 0, "Ev Idx": 18068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144558036.832, "dur": 1094.812, + "args": { + "External id": 510033,"Sequence number": 5516279, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18069 + } + }, + { + "ph": "s", "id": 408, "pid": 2338711, "tid": 2338711, "ts": 6274144558036.832, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144558134.531, "dur": 50.279, + "args": { + "External id": 510034,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144558198.829, "dur": 103.279, + "args": { + "External id": 510035,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144558314.767, "dur": 37.940, + "args": { + "External id": 510036,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144558361.620, "dur": 29.333, + "args": { + "External id": 510037,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144558419.321, "dur": 28.395, + "args": { + "External id": 510038,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144558468.751, "dur": 15.475, + "args": { + "External id": 510039,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144558508.033, "dur": 152.853, + "args": { + "External id": 510040,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144558558.751, "dur": 11.460, + "args": { + "External id": 510041,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144558564.311, "dur": 5.019, + "args": { + "External id": 510042,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144558572.985, "dur": 4.207, + "args": { + "External id": 510043,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144558578.429, "dur": 1.190, + "args": { + "External id": 510044,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144558582.296, "dur": 23.057, + "args": { + "External id": 510045,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144558675.894, "dur": 54.128, + "args": { + "External id": 510046,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144558763.372, "dur": 29.088, + "args": { + "External id": 510047,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144558801.863, "dur": 41.844, + "args": { + "External id": 510048,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144558854.374, "dur": 34.981, + "args": { + "External id": 510049,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144558916.672, "dur": 25.905, + "args": { + "External id": 510050,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144558949.853, "dur": 38.185, + "args": { + "External id": 510051,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144559005.775, "dur": 19.602, + "args": { + "External id": 510052,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18088 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.19)", "pid": 2338711, "tid": 2338711, + "ts": 6274144559217.128, "dur": 83.768, + "args": { + "External id": 510053,"Record function id": 0, "Ev Idx": 18089 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144559375.989, "dur": 47.860, + "args": { + "External id": 510054,"Record function id": 0, "Ev Idx": 18090 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.20)", "pid": 2338711, "tid": 2338711, + "ts": 6274144559433.206, "dur": 31015.517, + "args": { + "External id": 510055,"Record function id": 0, "Ev Idx": 18091 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2338711, "tid": 2338711, + "ts": 6274144559442.394, "dur": 952.375, + "args": { + "External id": 510056,"Record function id": 0, "Ev Idx": 18092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144559530.767, "dur": 9.263, + "args": { + "External id": 510057,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144559553.906, "dur": 53.012, + "args": { + "External id": 510058,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144559559.842, "dur": 2.677, + "args": { + "External id": 510059,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144559567.094, "dur": 0.645, + "args": { + "External id": 510060,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144559569.171, "dur": 0.531, + "args": { + "External id": 510061,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144559571.081, "dur": 0.510, + "args": { + "External id": 510062,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144559574.282, "dur": 0.714, + "args": { + "External id": 510063,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144559576.458, "dur": 0.495, + "args": { + "External id": 510064,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144559578.299, "dur": 2.878, + "args": { + "External id": 510065,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144559582.634, "dur": 0.469, + "args": { + "External id": 510066,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144559584.756, "dur": 0.531, + "args": { + "External id": 510067,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144559620.935, "dur": 58.663, + "args": { + "External id": 510068,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144559715.633, "dur": 123.589, + "args": { + "External id": 510069,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144559727.535, "dur": 5.106, + "args": { + "External id": 510070,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144559738.092, "dur": 13.010, + "args": { + "External id": 510071,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144559742.581, "dur": 8.114, + "args": { + "External id": 510072,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144559748.629, "dur": 0.732, + "args": { + "External id": 510073,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144559758.245, "dur": 29.630, + "args": { + "External id": 510074,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144559760.114, "dur": 2.403, + "args": { + "External id": 510075,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144559763.966, "dur": 0.521, + "args": { + "External id": 510076,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144559765.564, "dur": 0.365, + "args": { + "External id": 510077,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144559768.689, "dur": 2.688, + "args": { + "External id": 510078,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144559772.591, "dur": 0.379, + "args": { + "External id": 510079,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144559774.621, "dur": 0.356, + "args": { + "External id": 510080,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144559778.039, "dur": 0.369, + "args": { + "External id": 510081,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144559780.254, "dur": 0.294, + "args": { + "External id": 510082,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144559781.491, "dur": 2.441, + "args": { + "External id": 510083,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144559799.088, "dur": 31.944, + "args": { + "External id": 510084,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144559893.190, "dur": 395.893, + "args": { + "External id": 510085,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144559924.677, "dur": 358.094, + "args": { + "External id": 510086,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18122, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144559935.086, "dur": 341.031, + "args": { + "External id": 510087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144560317.603, "dur": 2.288, + "args": { + "External id": 510088,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18124, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2338711, "tid": 2338711, + "ts": 6274144560416.294, "dur": 29829.238, + "args": { + "External id": 510089,"Record function id": 0, "Ev Idx": 18125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144560517.188, "dur": 6.652, + "args": { + "External id": 510090,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144560527.413, "dur": 1.018, + "args": { + "External id": 510091,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144560530.051, "dur": 3.233, + "args": { + "External id": 510092,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144560535.082, "dur": 0.673, + "args": { + "External id": 510093,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144560537.095, "dur": 0.882, + "args": { + "External id": 510094,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144560539.269, "dur": 0.908, + "args": { + "External id": 510095,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144560542.030, "dur": 0.845, + "args": { + "External id": 510096,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144560544.863, "dur": 2.292, + "args": { + "External id": 510097,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144560548.762, "dur": 0.545, + "args": { + "External id": 510098,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144560552.463, "dur": 0.657, + "args": { + "External id": 510099,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144560570.998, "dur": 29628.971, + "args": { + "External id": 510100,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144560610.718, "dur": 29580.834, + "args": { + "External id": 510101,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144560626.783, "dur": 16.704, + "args": { + "External id": 510102,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144560647.139, "dur": 29504.993, + "args": { + "External id": 510103,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144560650.178, "dur": 29501.265, + "args": { + "External id": 510104,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144560656.232, "dur": 10.524, + "args": { + "External id": 510105,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144560668.323, "dur": 29479.638, + "args": { + "External id": 510106,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144590391.218, "dur": 31.960, + "args": { + "External id": 510107,"Sequence number": 5516280, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18143 + } + }, + { + "ph": "s", "id": 407, "pid": 2338711, "tid": 2338711, "ts": 6274144590391.218, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144590409.384, "dur": 9.100, + "args": { + "External id": 510108,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144590413.676, "dur": 4.600, + "args": { + "External id": 510109,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144590490.327, "dur": 70.300, + "args": { + "External id": 510110,"Record function id": 0, "Ev Idx": 18146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144590562.319, "dur": 1136.519, + "args": { + "External id": 510111,"Record function id": 0, "Ev Idx": 18147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144590615.378, "dur": 1069.692, + "args": { + "External id": 510112,"Sequence number": 5516281, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18148 + } + }, + { + "ph": "s", "id": 406, "pid": 2338711, "tid": 2338711, "ts": 6274144590615.378, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144590685.518, "dur": 50.859, + "args": { + "External id": 510113,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144590749.012, "dur": 106.038, + "args": { + "External id": 510114,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144590867.097, "dur": 36.820, + "args": { + "External id": 510115,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144590913.212, "dur": 30.979, + "args": { + "External id": 510116,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144590969.234, "dur": 27.174, + "args": { + "External id": 510117,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144591017.821, "dur": 18.640, + "args": { + "External id": 510118,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144591093.807, "dur": 147.778, + "args": { + "External id": 510119,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144591149.405, "dur": 14.732, + "args": { + "External id": 510120,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144591156.679, "dur": 6.531, + "args": { + "External id": 510121,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144591166.934, "dur": 4.345, + "args": { + "External id": 510122,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144591175.517, "dur": 1.062, + "args": { + "External id": 510123,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144591179.026, "dur": 5.645, + "args": { + "External id": 510124,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144591253.041, "dur": 52.847, + "args": { + "External id": 510125,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144591339.193, "dur": 28.375, + "args": { + "External id": 510126,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144591377.462, "dur": 44.255, + "args": { + "External id": 510127,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144591432.466, "dur": 35.240, + "args": { + "External id": 510128,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144591488.902, "dur": 26.811, + "args": { + "External id": 510129,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144591524.023, "dur": 36.608, + "args": { + "External id": 510130,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144591579.842, "dur": 34.340, + "args": { + "External id": 510131,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18167 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.20)", "pid": 2338711, "tid": 2338711, + "ts": 6274144591766.212, "dur": 75.916, + "args": { + "External id": 510132,"Record function id": 0, "Ev Idx": 18168 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144591920.415, "dur": 49.365, + "args": { + "External id": 510133,"Record function id": 0, "Ev Idx": 18169 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.21)", "pid": 2338711, "tid": 2338711, + "ts": 6274144591979.101, "dur": 31561.952, + "args": { + "External id": 510134,"Record function id": 0, "Ev Idx": 18170 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2338711, "tid": 2338711, + "ts": 6274144591987.408, "dur": 943.435, + "args": { + "External id": 510135,"Record function id": 0, "Ev Idx": 18171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144592103.728, "dur": 9.666, + "args": { + "External id": 510136,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144592128.288, "dur": 40.372, + "args": { + "External id": 510137,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144592134.356, "dur": 2.608, + "args": { + "External id": 510138,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144592140.995, "dur": 0.697, + "args": { + "External id": 510139,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144592143.268, "dur": 0.474, + "args": { + "External id": 510140,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144592144.818, "dur": 0.651, + "args": { + "External id": 510141,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144592149.332, "dur": 0.503, + "args": { + "External id": 510142,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144592150.949, "dur": 0.583, + "args": { + "External id": 510143,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144592153.358, "dur": 5.077, + "args": { + "External id": 510144,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144592159.915, "dur": 0.424, + "args": { + "External id": 510145,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144592161.782, "dur": 0.465, + "args": { + "External id": 510146,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144592180.415, "dur": 56.639, + "args": { + "External id": 510147,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144592272.852, "dur": 129.166, + "args": { + "External id": 510148,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144592284.033, "dur": 3.864, + "args": { + "External id": 510149,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144592296.346, "dur": 10.334, + "args": { + "External id": 510150,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144592300.671, "dur": 5.613, + "args": { + "External id": 510151,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144592304.334, "dur": 0.698, + "args": { + "External id": 510152,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144592313.866, "dur": 30.737, + "args": { + "External id": 510153,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144592316.169, "dur": 2.336, + "args": { + "External id": 510154,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144592320.052, "dur": 0.457, + "args": { + "External id": 510155,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144592321.687, "dur": 0.355, + "args": { + "External id": 510156,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144592325.384, "dur": 2.908, + "args": { + "External id": 510157,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144592329.582, "dur": 0.421, + "args": { + "External id": 510158,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144592332.087, "dur": 0.250, + "args": { + "External id": 510159,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144592334.691, "dur": 0.387, + "args": { + "External id": 510160,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144592336.551, "dur": 0.345, + "args": { + "External id": 510161,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144592338.236, "dur": 2.250, + "args": { + "External id": 510162,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144592360.055, "dur": 34.114, + "args": { + "External id": 510163,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144592462.371, "dur": 355.121, + "args": { + "External id": 510164,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144592494.745, "dur": 316.802, + "args": { + "External id": 510165,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18201, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144592504.848, "dur": 300.372, + "args": { + "External id": 510166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144592849.395, "dur": 2.811, + "args": { + "External id": 510167,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18203, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2338711, "tid": 2338711, + "ts": 6274144592953.308, "dur": 30389.030, + "args": { + "External id": 510168,"Record function id": 0, "Ev Idx": 18204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144593088.877, "dur": 7.750, + "args": { + "External id": 510169,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144593102.277, "dur": 1.048, + "args": { + "External id": 510170,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144593105.285, "dur": 2.955, + "args": { + "External id": 510171,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144593109.984, "dur": 0.991, + "args": { + "External id": 510172,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144593112.409, "dur": 0.676, + "args": { + "External id": 510173,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144593114.368, "dur": 0.476, + "args": { + "External id": 510174,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144593116.472, "dur": 0.774, + "args": { + "External id": 510175,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144593119.136, "dur": 2.372, + "args": { + "External id": 510176,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144593122.902, "dur": 0.514, + "args": { + "External id": 510177,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144593127.264, "dur": 0.822, + "args": { + "External id": 510178,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144593148.096, "dur": 30150.987, + "args": { + "External id": 510179,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144593164.635, "dur": 30127.020, + "args": { + "External id": 510180,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144593184.326, "dur": 17.113, + "args": { + "External id": 510181,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144593205.155, "dur": 30050.842, + "args": { + "External id": 510182,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144593207.728, "dur": 30047.687, + "args": { + "External id": 510183,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144593213.484, "dur": 6.261, + "args": { + "External id": 510184,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144593221.335, "dur": 30031.189, + "args": { + "External id": 510185,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144623481.184, "dur": 32.711, + "args": { + "External id": 510186,"Sequence number": 5516282, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18222 + } + }, + { + "ph": "s", "id": 405, "pid": 2338711, "tid": 2338711, "ts": 6274144623481.184, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144623501.019, "dur": 8.143, + "args": { + "External id": 510187,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144623504.894, "dur": 4.066, + "args": { + "External id": 510188,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144623581.427, "dur": 83.397, + "args": { + "External id": 510189,"Record function id": 0, "Ev Idx": 18225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144623667.074, "dur": 1141.571, + "args": { + "External id": 510190,"Record function id": 0, "Ev Idx": 18226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144623712.916, "dur": 1081.269, + "args": { + "External id": 510191,"Sequence number": 5516283, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18227 + } + }, + { + "ph": "s", "id": 404, "pid": 2338711, "tid": 2338711, "ts": 6274144623712.916, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144623789.473, "dur": 50.100, + "args": { + "External id": 510192,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144623852.268, "dur": 104.193, + "args": { + "External id": 510193,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144623971.865, "dur": 40.124, + "args": { + "External id": 510194,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144624021.231, "dur": 65.260, + "args": { + "External id": 510195,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144624119.024, "dur": 30.456, + "args": { + "External id": 510196,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144624171.945, "dur": 16.932, + "args": { + "External id": 510197,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144624213.735, "dur": 133.238, + "args": { + "External id": 510198,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144624265.299, "dur": 11.797, + "args": { + "External id": 510199,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144624271.033, "dur": 5.326, + "args": { + "External id": 510200,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144624279.885, "dur": 4.473, + "args": { + "External id": 510201,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144624285.647, "dur": 1.272, + "args": { + "External id": 510202,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144624289.586, "dur": 6.317, + "args": { + "External id": 510203,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144624357.052, "dur": 52.804, + "args": { + "External id": 510204,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144624443.098, "dur": 28.418, + "args": { + "External id": 510205,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144624480.956, "dur": 42.112, + "args": { + "External id": 510206,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144624532.851, "dur": 35.104, + "args": { + "External id": 510207,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144624604.318, "dur": 27.827, + "args": { + "External id": 510208,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144624640.525, "dur": 39.335, + "args": { + "External id": 510209,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144624702.020, "dur": 18.928, + "args": { + "External id": 510210,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18246 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.21)", "pid": 2338711, "tid": 2338711, + "ts": 6274144624880.668, "dur": 85.801, + "args": { + "External id": 510211,"Record function id": 0, "Ev Idx": 18247 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144625045.304, "dur": 84.370, + "args": { + "External id": 510212,"Record function id": 0, "Ev Idx": 18248 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.22)", "pid": 2338711, "tid": 2338711, + "ts": 6274144625140.449, "dur": 30191.779, + "args": { + "External id": 510213,"Record function id": 0, "Ev Idx": 18249 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2338711, "tid": 2338711, + "ts": 6274144625148.345, "dur": 958.169, + "args": { + "External id": 510214,"Record function id": 0, "Ev Idx": 18250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144625233.743, "dur": 9.834, + "args": { + "External id": 510215,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144625257.577, "dur": 39.993, + "args": { + "External id": 510216,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144625263.803, "dur": 2.397, + "args": { + "External id": 510217,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144625270.219, "dur": 0.332, + "args": { + "External id": 510218,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144625272.369, "dur": 0.594, + "args": { + "External id": 510219,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144625274.501, "dur": 0.508, + "args": { + "External id": 510220,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144625278.933, "dur": 0.494, + "args": { + "External id": 510221,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144625280.716, "dur": 0.567, + "args": { + "External id": 510222,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144625283.167, "dur": 4.827, + "args": { + "External id": 510223,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144625289.172, "dur": 0.579, + "args": { + "External id": 510224,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144625291.047, "dur": 0.416, + "args": { + "External id": 510225,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144625309.532, "dur": 60.274, + "args": { + "External id": 510226,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144625402.963, "dur": 128.559, + "args": { + "External id": 510227,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144625414.843, "dur": 4.127, + "args": { + "External id": 510228,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144625427.039, "dur": 10.301, + "args": { + "External id": 510229,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144625431.473, "dur": 5.424, + "args": { + "External id": 510230,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144625435.266, "dur": 0.374, + "args": { + "External id": 510231,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144625443.838, "dur": 32.907, + "args": { + "External id": 510232,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144625446.113, "dur": 2.796, + "args": { + "External id": 510233,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144625450.276, "dur": 0.403, + "args": { + "External id": 510234,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144625452.056, "dur": 0.439, + "args": { + "External id": 510235,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144625455.643, "dur": 3.053, + "args": { + "External id": 510236,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144625462.482, "dur": 0.457, + "args": { + "External id": 510237,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144625463.812, "dur": 0.315, + "args": { + "External id": 510238,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144625466.799, "dur": 0.330, + "args": { + "External id": 510239,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144625468.696, "dur": 0.471, + "args": { + "External id": 510240,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144625470.173, "dur": 2.546, + "args": { + "External id": 510241,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144625491.068, "dur": 32.656, + "args": { + "External id": 510242,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144625602.859, "dur": 367.850, + "args": { + "External id": 510243,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144625634.516, "dur": 330.993, + "args": { + "External id": 510244,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18280, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144625645.506, "dur": 311.039, + "args": { + "External id": 510245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144625995.169, "dur": 2.314, + "args": { + "External id": 510246,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18282, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2338711, "tid": 2338711, + "ts": 6274144626129.894, "dur": 29008.945, + "args": { + "External id": 510247,"Record function id": 0, "Ev Idx": 18283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144626230.804, "dur": 6.820, + "args": { + "External id": 510248,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144626241.248, "dur": 1.140, + "args": { + "External id": 510249,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144626243.907, "dur": 3.019, + "args": { + "External id": 510250,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144626248.822, "dur": 0.863, + "args": { + "External id": 510251,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144626254.712, "dur": 0.992, + "args": { + "External id": 510252,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144626256.979, "dur": 0.846, + "args": { + "External id": 510253,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144626259.615, "dur": 0.701, + "args": { + "External id": 510254,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144626261.815, "dur": 2.195, + "args": { + "External id": 510255,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144626265.615, "dur": 0.836, + "args": { + "External id": 510256,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144626270.121, "dur": 0.718, + "args": { + "External id": 510257,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144626289.814, "dur": 28807.272, + "args": { + "External id": 510258,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144626306.258, "dur": 28782.576, + "args": { + "External id": 510259,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144626325.064, "dur": 16.081, + "args": { + "External id": 510260,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144626344.689, "dur": 28679.488, + "args": { + "External id": 510261,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144626347.259, "dur": 28676.161, + "args": { + "External id": 510262,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144626353.349, "dur": 5.877, + "args": { + "External id": 510263,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144626361.129, "dur": 28658.970, + "args": { + "External id": 510264,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144655275.664, "dur": 31.735, + "args": { + "External id": 510265,"Sequence number": 5516284, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18301 + } + }, + { + "ph": "s", "id": 403, "pid": 2338711, "tid": 2338711, "ts": 6274144655275.664, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144655294.271, "dur": 8.493, + "args": { + "External id": 510266,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144655298.421, "dur": 4.140, + "args": { + "External id": 510267,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144655372.506, "dur": 76.196, + "args": { + "External id": 510268,"Record function id": 0, "Ev Idx": 18304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144655450.229, "dur": 1152.338, + "args": { + "External id": 510269,"Record function id": 0, "Ev Idx": 18305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144655489.130, "dur": 1085.315, + "args": { + "External id": 510270,"Sequence number": 5516285, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18306 + } + }, + { + "ph": "s", "id": 402, "pid": 2338711, "tid": 2338711, "ts": 6274144655489.130, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144655556.493, "dur": 64.965, + "args": { + "External id": 510271,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144655638.073, "dur": 106.040, + "args": { + "External id": 510272,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144655756.831, "dur": 41.242, + "args": { + "External id": 510273,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144655807.053, "dur": 29.621, + "args": { + "External id": 510274,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144655863.926, "dur": 29.793, + "args": { + "External id": 510275,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144655916.920, "dur": 19.535, + "args": { + "External id": 510276,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144655960.128, "dur": 170.423, + "args": { + "External id": 510277,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144656010.438, "dur": 11.274, + "args": { + "External id": 510278,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144656015.707, "dur": 5.222, + "args": { + "External id": 510279,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144656024.912, "dur": 4.009, + "args": { + "External id": 510280,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144656030.654, "dur": 1.113, + "args": { + "External id": 510281,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144656034.292, "dur": 5.189, + "args": { + "External id": 510282,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144656144.114, "dur": 56.914, + "args": { + "External id": 510283,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144656238.297, "dur": 34.735, + "args": { + "External id": 510284,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144656283.282, "dur": 42.213, + "args": { + "External id": 510285,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144656335.896, "dur": 34.942, + "args": { + "External id": 510286,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144656392.639, "dur": 30.346, + "args": { + "External id": 510287,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144656430.665, "dur": 35.558, + "args": { + "External id": 510288,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144656485.876, "dur": 18.521, + "args": { + "External id": 510289,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18325 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.22)", "pid": 2338711, "tid": 2338711, + "ts": 6274144656673.183, "dur": 80.040, + "args": { + "External id": 510290,"Record function id": 0, "Ev Idx": 18326 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144656832.093, "dur": 51.608, + "args": { + "External id": 510291,"Record function id": 0, "Ev Idx": 18327 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.23)", "pid": 2338711, "tid": 2338711, + "ts": 6274144656892.129, "dur": 31469.485, + "args": { + "External id": 510292,"Record function id": 0, "Ev Idx": 18328 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2338711, "tid": 2338711, + "ts": 6274144656899.107, "dur": 976.953, + "args": { + "External id": 510293,"Record function id": 0, "Ev Idx": 18329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144656983.080, "dur": 9.492, + "args": { + "External id": 510294,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144657006.122, "dur": 42.178, + "args": { + "External id": 510295,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144657012.293, "dur": 2.531, + "args": { + "External id": 510296,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144657019.130, "dur": 0.373, + "args": { + "External id": 510297,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144657020.951, "dur": 0.376, + "args": { + "External id": 510298,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144657022.936, "dur": 0.600, + "args": { + "External id": 510299,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144657027.708, "dur": 0.599, + "args": { + "External id": 510300,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144657029.448, "dur": 0.312, + "args": { + "External id": 510301,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144657033.562, "dur": 5.085, + "args": { + "External id": 510302,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144657039.948, "dur": 0.356, + "args": { + "External id": 510303,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144657041.797, "dur": 0.417, + "args": { + "External id": 510304,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144657094.982, "dur": 58.164, + "args": { + "External id": 510305,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144657191.054, "dur": 131.257, + "args": { + "External id": 510306,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144657203.219, "dur": 4.895, + "args": { + "External id": 510307,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144657213.290, "dur": 11.686, + "args": { + "External id": 510308,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144657217.744, "dur": 6.818, + "args": { + "External id": 510309,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144657221.862, "dur": 0.857, + "args": { + "External id": 510310,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144657232.473, "dur": 31.598, + "args": { + "External id": 510311,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144657234.848, "dur": 2.500, + "args": { + "External id": 510312,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144657239.012, "dur": 0.590, + "args": { + "External id": 510313,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144657241.066, "dur": 0.613, + "args": { + "External id": 510314,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144657245.342, "dur": 2.953, + "args": { + "External id": 510315,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144657249.490, "dur": 0.420, + "args": { + "External id": 510316,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144657251.202, "dur": 0.364, + "args": { + "External id": 510317,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144657254.552, "dur": 0.403, + "args": { + "External id": 510318,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144657256.296, "dur": 0.370, + "args": { + "External id": 510319,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144657257.708, "dur": 2.216, + "args": { + "External id": 510320,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144657277.655, "dur": 36.632, + "args": { + "External id": 510321,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144657383.174, "dur": 386.328, + "args": { + "External id": 510322,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144657414.927, "dur": 348.495, + "args": { + "External id": 510323,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18359, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144657425.403, "dur": 331.516, + "args": { + "External id": 510324,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144657797.981, "dur": 2.505, + "args": { + "External id": 510325,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18361, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2338711, "tid": 2338711, + "ts": 6274144657896.534, "dur": 30265.111, + "args": { + "External id": 510326,"Record function id": 0, "Ev Idx": 18362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144658000.401, "dur": 6.149, + "args": { + "External id": 510327,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144658013.065, "dur": 1.217, + "args": { + "External id": 510328,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144658016.191, "dur": 3.145, + "args": { + "External id": 510329,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144658021.250, "dur": 1.037, + "args": { + "External id": 510330,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144658023.629, "dur": 0.784, + "args": { + "External id": 510331,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144658025.664, "dur": 0.800, + "args": { + "External id": 510332,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144658027.927, "dur": 0.926, + "args": { + "External id": 510333,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144658030.697, "dur": 2.283, + "args": { + "External id": 510334,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144658034.630, "dur": 0.606, + "args": { + "External id": 510335,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144658038.976, "dur": 0.629, + "args": { + "External id": 510336,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144658091.484, "dur": 30025.567, + "args": { + "External id": 510337,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144658108.796, "dur": 29999.685, + "args": { + "External id": 510338,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144658125.750, "dur": 18.061, + "args": { + "External id": 510339,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144658147.611, "dur": 29900.703, + "args": { + "External id": 510340,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144658150.264, "dur": 29897.420, + "args": { + "External id": 510341,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144658156.179, "dur": 6.644, + "args": { + "External id": 510342,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144658164.487, "dur": 29879.971, + "args": { + "External id": 510343,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144688304.472, "dur": 30.868, + "args": { + "External id": 510344,"Sequence number": 5516286, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18380 + } + }, + { + "ph": "s", "id": 401, "pid": 2338711, "tid": 2338711, "ts": 6274144688304.472, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144688321.562, "dur": 8.993, + "args": { + "External id": 510345,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144688325.519, "dur": 4.793, + "args": { + "External id": 510346,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144688402.079, "dur": 69.304, + "args": { + "External id": 510347,"Record function id": 0, "Ev Idx": 18383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144688472.591, "dur": 1152.342, + "args": { + "External id": 510348,"Record function id": 0, "Ev Idx": 18384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144688510.042, "dur": 1099.864, + "args": { + "External id": 510349,"Sequence number": 5516287, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18385 + } + }, + { + "ph": "s", "id": 400, "pid": 2338711, "tid": 2338711, "ts": 6274144688510.042, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144688579.143, "dur": 68.440, + "args": { + "External id": 510350,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144688663.375, "dur": 104.944, + "args": { + "External id": 510351,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144688781.319, "dur": 37.455, + "args": { + "External id": 510352,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144688828.345, "dur": 29.248, + "args": { + "External id": 510353,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144688885.015, "dur": 29.282, + "args": { + "External id": 510354,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144688936.812, "dur": 17.514, + "args": { + "External id": 510355,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144688978.715, "dur": 180.369, + "args": { + "External id": 510356,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144689032.948, "dur": 11.347, + "args": { + "External id": 510357,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144689038.360, "dur": 5.185, + "args": { + "External id": 510358,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144689047.320, "dur": 4.093, + "args": { + "External id": 510359,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144689052.495, "dur": 38.496, + "args": { + "External id": 510360,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144689095.192, "dur": 5.926, + "args": { + "External id": 510361,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144689170.874, "dur": 54.846, + "args": { + "External id": 510362,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144689259.458, "dur": 32.336, + "args": { + "External id": 510363,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144689301.924, "dur": 41.795, + "args": { + "External id": 510364,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144689354.904, "dur": 34.737, + "args": { + "External id": 510365,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144689412.383, "dur": 28.405, + "args": { + "External id": 510366,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144689448.242, "dur": 35.588, + "args": { + "External id": 510367,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144689503.726, "dur": 19.464, + "args": { + "External id": 510368,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18404 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.23)", "pid": 2338711, "tid": 2338711, + "ts": 6274144689692.364, "dur": 78.417, + "args": { + "External id": 510369,"Record function id": 0, "Ev Idx": 18405 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144689844.738, "dur": 46.941, + "args": { + "External id": 510370,"Record function id": 0, "Ev Idx": 18406 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.24)", "pid": 2338711, "tid": 2338711, + "ts": 6274144689900.015, "dur": 31505.206, + "args": { + "External id": 510371,"Record function id": 0, "Ev Idx": 18407 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2338711, "tid": 2338711, + "ts": 6274144689908.553, "dur": 965.812, + "args": { + "External id": 510372,"Record function id": 0, "Ev Idx": 18408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144689990.893, "dur": 8.592, + "args": { + "External id": 510373,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144690013.311, "dur": 73.362, + "args": { + "External id": 510374,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144690019.667, "dur": 2.386, + "args": { + "External id": 510375,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144690026.009, "dur": 0.576, + "args": { + "External id": 510376,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144690028.273, "dur": 0.483, + "args": { + "External id": 510377,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144690029.849, "dur": 0.464, + "args": { + "External id": 510378,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144690034.058, "dur": 0.695, + "args": { + "External id": 510379,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144690035.853, "dur": 0.620, + "args": { + "External id": 510380,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144690037.973, "dur": 4.790, + "args": { + "External id": 510381,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144690044.156, "dur": 0.428, + "args": { + "External id": 510382,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144690046.202, "dur": 0.404, + "args": { + "External id": 510383,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144690101.836, "dur": 58.792, + "args": { + "External id": 510384,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144690197.767, "dur": 134.038, + "args": { + "External id": 510385,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144690212.774, "dur": 5.511, + "args": { + "External id": 510386,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144690223.357, "dur": 10.814, + "args": { + "External id": 510387,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144690227.997, "dur": 5.761, + "args": { + "External id": 510388,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144690231.526, "dur": 0.750, + "args": { + "External id": 510389,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144690241.652, "dur": 35.281, + "args": { + "External id": 510390,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144690243.956, "dur": 2.251, + "args": { + "External id": 510391,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144690247.468, "dur": 0.428, + "args": { + "External id": 510392,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144690249.611, "dur": 0.270, + "args": { + "External id": 510393,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144690253.761, "dur": 2.569, + "args": { + "External id": 510394,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144690257.699, "dur": 0.520, + "args": { + "External id": 510395,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144690263.262, "dur": 0.403, + "args": { + "External id": 510396,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144690267.569, "dur": 0.262, + "args": { + "External id": 510397,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144690269.046, "dur": 0.334, + "args": { + "External id": 510398,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144690270.714, "dur": 2.336, + "args": { + "External id": 510399,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144690290.816, "dur": 33.148, + "args": { + "External id": 510400,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144690387.078, "dur": 381.281, + "args": { + "External id": 510401,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144690417.834, "dur": 344.658, + "args": { + "External id": 510402,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18438, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144690429.653, "dur": 326.706, + "args": { + "External id": 510403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144690795.987, "dur": 2.577, + "args": { + "External id": 510404,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18440, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2338711, "tid": 2338711, + "ts": 6274144690895.773, "dur": 30314.796, + "args": { + "External id": 510405,"Record function id": 0, "Ev Idx": 18441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144690997.848, "dur": 6.627, + "args": { + "External id": 510406,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144691007.994, "dur": 1.047, + "args": { + "External id": 510407,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144691010.755, "dur": 3.857, + "args": { + "External id": 510408,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144691016.237, "dur": 0.790, + "args": { + "External id": 510409,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144691018.491, "dur": 0.863, + "args": { + "External id": 510410,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144691020.776, "dur": 0.962, + "args": { + "External id": 510411,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144691023.119, "dur": 0.788, + "args": { + "External id": 510412,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144691025.856, "dur": 2.216, + "args": { + "External id": 510413,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144691029.371, "dur": 0.781, + "args": { + "External id": 510414,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144691034.050, "dur": 0.747, + "args": { + "External id": 510415,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144691086.758, "dur": 30080.068, + "args": { + "External id": 510416,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144691105.083, "dur": 30053.707, + "args": { + "External id": 510417,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144691120.354, "dur": 16.981, + "args": { + "External id": 510418,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144691140.997, "dur": 29981.473, + "args": { + "External id": 510419,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144691143.814, "dur": 29978.082, + "args": { + "External id": 510420,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144691150.037, "dur": 6.891, + "args": { + "External id": 510421,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144691158.915, "dur": 29959.787, + "args": { + "External id": 510422,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144721347.961, "dur": 30.837, + "args": { + "External id": 510423,"Sequence number": 5516288, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18459 + } + }, + { + "ph": "s", "id": 399, "pid": 2338711, "tid": 2338711, "ts": 6274144721347.961, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144721364.995, "dur": 9.106, + "args": { + "External id": 510424,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144721369.306, "dur": 4.549, + "args": { + "External id": 510425,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144721446.014, "dur": 69.838, + "args": { + "External id": 510426,"Record function id": 0, "Ev Idx": 18462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144721517.407, "dur": 1138.401, + "args": { + "External id": 510427,"Record function id": 0, "Ev Idx": 18463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144721555.066, "dur": 1085.897, + "args": { + "External id": 510428,"Sequence number": 5516289, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18464 + } + }, + { + "ph": "s", "id": 398, "pid": 2338711, "tid": 2338711, "ts": 6274144721555.066, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144721639.018, "dur": 52.075, + "args": { + "External id": 510429,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144721704.268, "dur": 102.340, + "args": { + "External id": 510430,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144721817.982, "dur": 38.036, + "args": { + "External id": 510431,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144721865.688, "dur": 30.660, + "args": { + "External id": 510432,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144721921.284, "dur": 28.938, + "args": { + "External id": 510433,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144721973.050, "dur": 20.512, + "args": { + "External id": 510434,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144722016.747, "dur": 173.264, + "args": { + "External id": 510435,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144722104.983, "dur": 12.422, + "args": { + "External id": 510436,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144722110.403, "dur": 6.041, + "args": { + "External id": 510437,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144722120.175, "dur": 3.997, + "args": { + "External id": 510438,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144722125.538, "dur": 0.839, + "args": { + "External id": 510439,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144722128.917, "dur": 5.509, + "args": { + "External id": 510440,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144722202.024, "dur": 55.167, + "args": { + "External id": 510441,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144722291.483, "dur": 30.881, + "args": { + "External id": 510442,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144722332.265, "dur": 42.211, + "args": { + "External id": 510443,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144722388.416, "dur": 35.632, + "args": { + "External id": 510444,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144722450.420, "dur": 25.733, + "args": { + "External id": 510445,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144722484.206, "dur": 35.378, + "args": { + "External id": 510446,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144722537.508, "dur": 16.905, + "args": { + "External id": 510447,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18483 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.24)", "pid": 2338711, "tid": 2338711, + "ts": 6274144722726.208, "dur": 82.783, + "args": { + "External id": 510448,"Record function id": 0, "Ev Idx": 18484 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338711, "tid": 2338711, + "ts": 6274144722884.524, "dur": 49.405, + "args": { + "External id": 510449,"Record function id": 0, "Ev Idx": 18485 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.25)", "pid": 2338711, "tid": 2338711, + "ts": 6274144722942.748, "dur": 31005.188, + "args": { + "External id": 510450,"Record function id": 0, "Ev Idx": 18486 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 2338711, "tid": 2338711, + "ts": 6274144722951.531, "dur": 1010.899, + "args": { + "External id": 510451,"Record function id": 0, "Ev Idx": 18487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144723036.348, "dur": 8.762, + "args": { + "External id": 510452,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144723095.698, "dur": 36.656, + "args": { + "External id": 510453,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144723101.772, "dur": 2.436, + "args": { + "External id": 510454,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144723108.329, "dur": 0.389, + "args": { + "External id": 510455,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144723109.705, "dur": 0.514, + "args": { + "External id": 510456,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144723111.411, "dur": 0.501, + "args": { + "External id": 510457,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144723115.028, "dur": 0.591, + "args": { + "External id": 510458,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144723116.718, "dur": 0.530, + "args": { + "External id": 510459,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144723118.872, "dur": 3.939, + "args": { + "External id": 510460,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144723124.109, "dur": 0.263, + "args": { + "External id": 510461,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144723125.717, "dur": 0.361, + "args": { + "External id": 510462,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144723145.289, "dur": 57.385, + "args": { + "External id": 510463,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338711, "tid": 2338711, + "ts": 6274144723239.043, "dur": 177.711, + "args": { + "External id": 510464,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144723285.646, "dur": 7.326, + "args": { + "External id": 510465,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338711, "tid": 2338711, + "ts": 6274144723298.591, "dur": 11.450, + "args": { + "External id": 510466,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144723302.919, "dur": 6.701, + "args": { + "External id": 510467,"Record function id": 0, "Concrete Inputs": ["", "0", "136320000", "163584000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144723307.840, "dur": 0.537, + "args": { + "External id": 510468,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338711, "tid": 2338711, + "ts": 6274144723318.010, "dur": 41.430, + "args": { + "External id": 510469,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144723327.930, "dur": 0.529, + "args": { + "External id": 510470,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "136320000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144723338.615, "dur": 0.276, + "args": { + "External id": 510471,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "136320512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144723339.966, "dur": 0.319, + "args": { + "External id": 510472,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138417664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144723341.531, "dur": 4.349, + "args": { + "External id": 510473,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "138941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144723346.744, "dur": 0.280, + "args": { + "External id": 510474,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "139466240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144723348.230, "dur": 0.247, + "args": { + "External id": 510475,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "141563392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144723351.393, "dur": 0.300, + "args": { + "External id": 510476,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "141563904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144723353.305, "dur": 0.247, + "args": { + "External id": 510477,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "148903936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144723355.006, "dur": 0.301, + "args": { + "External id": 510478,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "156243968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144723373.278, "dur": 35.717, + "args": { + "External id": 510479,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338711, "tid": 2338711, + "ts": 6274144723473.796, "dur": 383.917, + "args": { + "External id": 510480,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144723505.083, "dur": 346.234, + "args": { + "External id": 510481,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18517, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338711, "tid": 2338711, + "ts": 6274144723515.334, "dur": 329.830, + "args": { + "External id": 510482,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274144723885.835, "dur": 2.185, + "args": { + "External id": 510483,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18519, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 2338711, "tid": 2338711, + "ts": 6274144723982.804, "dur": 29765.883, + "args": { + "External id": 510484,"Record function id": 0, "Ev Idx": 18520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144724122.202, "dur": 7.543, + "args": { + "External id": 510485,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144724133.851, "dur": 1.308, + "args": { + "External id": 510486,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144724137.119, "dur": 3.930, + "args": { + "External id": 510487,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144724142.678, "dur": 0.681, + "args": { + "External id": 510488,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144724144.705, "dur": 0.917, + "args": { + "External id": 510489,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144724146.923, "dur": 0.899, + "args": { + "External id": 510490,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144724149.540, "dur": 0.971, + "args": { + "External id": 510491,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144724152.410, "dur": 2.302, + "args": { + "External id": 510492,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144724155.948, "dur": 0.865, + "args": { + "External id": 510493,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144724160.978, "dur": 0.660, + "args": { + "External id": 510494,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144724180.285, "dur": 29523.478, + "args": { + "External id": 510495,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144724197.429, "dur": 29498.089, + "args": { + "External id": 510496,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144724213.724, "dur": 16.890, + "args": { + "External id": 510497,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144724234.325, "dur": 29427.150, + "args": { + "External id": 510498,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144724237.176, "dur": 29423.679, + "args": { + "External id": 510499,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144724243.102, "dur": 5.432, + "args": { + "External id": 510500,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144724250.723, "dur": 29406.964, + "args": { + "External id": 510501,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144753889.434, "dur": 33.430, + "args": { + "External id": 510502,"Sequence number": 5516290, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18538 + } + }, + { + "ph": "s", "id": 397, "pid": 2338711, "tid": 2338711, "ts": 6274144753889.434, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144753909.381, "dur": 8.781, + "args": { + "External id": 510503,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144753913.385, "dur": 4.573, + "args": { + "External id": 510504,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144753989.819, "dur": 102.985, + "args": { + "External id": 510505,"Record function id": 0, "Ev Idx": 18541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338711, "tid": 2338711, + "ts": 6274144754095.133, "dur": 1128.109, + "args": { + "External id": 510506,"Record function id": 0, "Ev Idx": 18542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144754135.612, "dur": 1073.850, + "args": { + "External id": 510507,"Sequence number": 5516291, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18543 + } + }, + { + "ph": "s", "id": 396, "pid": 2338711, "tid": 2338711, "ts": 6274144754135.612, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144754206.720, "dur": 53.915, + "args": { + "External id": 510508,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144754274.040, "dur": 104.649, + "args": { + "External id": 510509,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144754389.889, "dur": 37.227, + "args": { + "External id": 510510,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144754435.442, "dur": 29.972, + "args": { + "External id": 510511,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144754490.020, "dur": 27.826, + "args": { + "External id": 510512,"kernel_hash": "cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/s7/cs7d77kfxexutexux7tghpgl5bqciqcmuxbdn7jqxz7dsk6wandx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338711, "tid": 2338711, + "ts": 6274144754541.300, "dur": 21.359, + "args": { + "External id": 510513,"kernel_hash": "cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ob/cob3bvej3r5r4b5x5w4xy3o5tdwdykbslcqvioi6atonuh4asxtl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144754604.374, "dur": 136.707, + "args": { + "External id": 510514,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144754657.096, "dur": 12.386, + "args": { + "External id": 510515,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144754662.558, "dur": 5.973, + "args": { + "External id": 510516,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144754672.637, "dur": 3.683, + "args": { + "External id": 510517,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144754677.584, "dur": 1.002, + "args": { + "External id": 510518,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144754681.358, "dur": 5.520, + "args": { + "External id": 510519,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144754752.125, "dur": 53.050, + "args": { + "External id": 510520,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338711, "tid": 2338711, + "ts": 6274144754836.955, "dur": 32.021, + "args": { + "External id": 510521,"kernel_hash": "cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/x4/cx4jnswz47j5getzegxeoo5bl65yhl6we4qxaupsfvecmonuhf2x.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144754878.704, "dur": 42.135, + "args": { + "External id": 510522,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144754931.174, "dur": 35.092, + "args": { + "External id": 510523,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144754987.813, "dur": 25.677, + "args": { + "External id": 510524,"kernel_hash": "cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/qx/cqx3wrlxigfm267ogmmi2epgc6irvfwk5fahu3bjwljvydrf3yw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144755021.708, "dur": 69.309, + "args": { + "External id": 510525,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338711, "tid": 2338711, + "ts": 6274144755117.211, "dur": 22.199, + "args": { + "External id": 510526,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18562 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.25)", "pid": 2338711, "tid": 2338711, + "ts": 6274144755289.369, "dur": 38.652, + "args": { + "External id": 510527,"Record function id": 0, "Ev Idx": 18563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144755463.548, "dur": 298.684, + "args": { + "External id": 510528,"Sequence number": 5516292, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 18564 + } + }, + { + "ph": "s", "id": 395, "pid": 2338711, "tid": 2338711, "ts": 6274144755463.548, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144755495.157, "dur": 8.861, + "args": { + "External id": 510529,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144755497.957, "dur": 5.832, + "args": { + "External id": 510530,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144755513.251, "dur": 13.028, + "args": { + "External id": 510531,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144755517.100, "dur": 8.551, + "args": { + "External id": 510532,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144755536.293, "dur": 4.274, + "args": { + "External id": 510533,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144755740.277, "dur": 8.361, + "args": { + "External id": 510534,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144755744.493, "dur": 3.718, + "args": { + "External id": 510535,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144755790.583, "dur": 134.778, + "args": { + "External id": 510536,"Sequence number": 5516293, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144755792.555, "dur": 15.422, + "args": { + "External id": 510537,"Sequence number": 5516293, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18573 + } + }, + { + "ph": "s", "id": 394, "pid": 2338711, "tid": 2338711, "ts": 6274144755792.555, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144755797.755, "dur": 8.351, + "args": { + "External id": 510538,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144755803.433, "dur": 2.338, + "args": { + "External id": 510539,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144755810.205, "dur": 114.881, + "args": { + "External id": 510540,"Sequence number": 5516294, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144755813.135, "dur": 4.834, + "args": { + "External id": 510541,"Sequence number": 5516294, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144755813.762, "dur": 4.065, + "args": { + "External id": 510542,"Sequence number": 5516294, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18578 + } + }, + { + "ph": "s", "id": 393, "pid": 2338711, "tid": 2338711, "ts": 6274144755813.762, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144755822.167, "dur": 92.560, + "args": { + "External id": 510543,"Sequence number": 5516295, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18579 + } + }, + { + "ph": "s", "id": 392, "pid": 2338711, "tid": 2338711, "ts": 6274144755822.167, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144755918.114, "dur": 5.948, + "args": { + "External id": 510544,"Sequence number": 5516296, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18580 + } + }, + { + "ph": "s", "id": 391, "pid": 2338711, "tid": 2338711, "ts": 6274144755918.114, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144755937.334, "dur": 73.162, + "args": { + "External id": 510545,"Sequence number": 5516297, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144755938.310, "dur": 9.309, + "args": { + "External id": 510546,"Sequence number": 5516297, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18582 + } + }, + { + "ph": "s", "id": 390, "pid": 2338711, "tid": 2338711, "ts": 6274144755938.310, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144755940.373, "dur": 6.170, + "args": { + "External id": 510547,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144755945.214, "dur": 0.857, + "args": { + "External id": 510548,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144755948.456, "dur": 61.760, + "args": { + "External id": 510549,"Sequence number": 5516298, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144755949.541, "dur": 5.850, + "args": { + "External id": 510550,"Sequence number": 5516298, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144755950.587, "dur": 4.627, + "args": { + "External id": 510551,"Sequence number": 5516298, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18587 + } + }, + { + "ph": "s", "id": 389, "pid": 2338711, "tid": 2338711, "ts": 6274144755950.587, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144755956.338, "dur": 46.807, + "args": { + "External id": 510552,"Sequence number": 5516299, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18588 + } + }, + { + "ph": "s", "id": 388, "pid": 2338711, "tid": 2338711, "ts": 6274144755956.338, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144756005.283, "dur": 4.537, + "args": { + "External id": 510553,"Sequence number": 5516300, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18589 + } + }, + { + "ph": "s", "id": 387, "pid": 2338711, "tid": 2338711, "ts": 6274144756005.283, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144756018.283, "dur": 109.995, + "args": { + "External id": 510554,"Sequence number": 5516301, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144756019.149, "dur": 6.152, + "args": { + "External id": 510555,"Sequence number": 5516301, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18591 + } + }, + { + "ph": "s", "id": 386, "pid": 2338711, "tid": 2338711, "ts": 6274144756019.149, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144756021.309, "dur": 2.353, + "args": { + "External id": 510556,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144756023.033, "dur": 0.516, + "args": { + "External id": 510557,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144756028.423, "dur": 99.634, + "args": { + "External id": 510558,"Sequence number": 5516302, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144756029.740, "dur": 5.498, + "args": { + "External id": 510559,"Sequence number": 5516302, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144756030.547, "dur": 4.538, + "args": { + "External id": 510560,"Sequence number": 5516302, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18596 + } + }, + { + "ph": "s", "id": 385, "pid": 2338711, "tid": 2338711, "ts": 6274144756030.547, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144756035.871, "dur": 79.247, + "args": { + "External id": 510561,"Sequence number": 5516303, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18597 + } + }, + { + "ph": "s", "id": 384, "pid": 2338711, "tid": 2338711, "ts": 6274144756035.871, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144756118.365, "dur": 9.205, + "args": { + "External id": 510562,"Sequence number": 5516304, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18598 + } + }, + { + "ph": "s", "id": 383, "pid": 2338711, "tid": 2338711, "ts": 6274144756118.365, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144756152.806, "dur": 3.802, + "args": { + "External id": 510563,"Sequence number": 5516305, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144756153.620, "dur": 2.892, + "args": { + "External id": 510564,"Sequence number": 5516305, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18600 + } + }, + { + "ph": "s", "id": 382, "pid": 2338711, "tid": 2338711, "ts": 6274144756153.620, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144756164.172, "dur": 3.124, + "args": { + "External id": 510565,"Sequence number": 5516306, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144756165.303, "dur": 1.873, + "args": { + "External id": 510566,"Sequence number": 5516306, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18602 + } + }, + { + "ph": "s", "id": 381, "pid": 2338711, "tid": 2338711, "ts": 6274144756165.303, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144756173.185, "dur": 5.322, + "args": { + "External id": 510567,"Sequence number": 5516307, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144756174.610, "dur": 3.720, + "args": { + "External id": 510568,"Sequence number": 5516307, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18604 + } + }, + { + "ph": "s", "id": 380, "pid": 2338711, "tid": 2338711, "ts": 6274144756174.610, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144756217.030, "dur": 189.160, + "args": { + "External id": 510569,"Sequence number": 5516308, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18605 + } + }, + { + "ph": "s", "id": 379, "pid": 2338711, "tid": 2338711, "ts": 6274144756217.030, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144756241.114, "dur": 10.559, + "args": { + "External id": 510570,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144756244.688, "dur": 6.413, + "args": { + "External id": 510571,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144756420.034, "dur": 119.499, + "args": { + "External id": 510572,"Sequence number": 5516309, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18608 + } + }, + { + "ph": "s", "id": 378, "pid": 2338711, "tid": 2338711, "ts": 6274144756420.034, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144756435.987, "dur": 8.197, + "args": { + "External id": 510573,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 18609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144756438.654, "dur": 5.086, + "args": { + "External id": 510574,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338711, "tid": 2338711, + "ts": 6274144756569.687, "dur": 217.852, + "args": { + "External id": 510575,"Sequence number": 5516310, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 18611 + } + }, + { + "ph": "s", "id": 377, "pid": 2338711, "tid": 2338711, "ts": 6274144756569.687, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144756617.789, "dur": 138.371, + "args": { + "External id": 510576,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144756672.834, "dur": 11.180, + "args": { + "External id": 510577,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144756675.558, "dur": 7.649, + "args": { + "External id": 510578,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144756686.902, "dur": 4.188, + "args": { + "External id": 510579,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144756692.240, "dur": 1.400, + "args": { + "External id": 510580,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144756696.110, "dur": 3.550, + "args": { + "External id": 510581,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2338711, + "ts": 6274144756771.615, "dur": 5.649, + "args": { + "External id": 510582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 18618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144756793.184, "dur": 7.017, + "args": { + "External id": 510583,"Sequence number": 5516311, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144756795.367, "dur": 4.626, + "args": { + "External id": 510584,"Sequence number": 5516311, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18620 + } + }, + { + "ph": "s", "id": 376, "pid": 2338711, "tid": 2338711, "ts": 6274144756795.367, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144756813.108, "dur": 127.981, + "args": { + "External id": 510585,"Sequence number": 5516312, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144756816.538, "dur": 9.997, + "args": { + "External id": 510586,"Sequence number": 5516312, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18622 + } + }, + { + "ph": "s", "id": 375, "pid": 2338711, "tid": 2338711, "ts": 6274144756816.538, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144756820.323, "dur": 5.147, + "args": { + "External id": 510587,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144756823.196, "dur": 1.960, + "args": { + "External id": 510588,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144756828.301, "dur": 112.541, + "args": { + "External id": 510589,"Sequence number": 5516313, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144756829.937, "dur": 8.638, + "args": { + "External id": 510590,"Sequence number": 5516313, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144756833.299, "dur": 5.066, + "args": { + "External id": 510591,"Sequence number": 5516313, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18627 + } + }, + { + "ph": "s", "id": 374, "pid": 2338711, "tid": 2338711, "ts": 6274144756833.299, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144756839.691, "dur": 94.045, + "args": { + "External id": 510592,"Sequence number": 5516314, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18628 + } + }, + { + "ph": "s", "id": 373, "pid": 2338711, "tid": 2338711, "ts": 6274144756839.691, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144756936.819, "dur": 3.151, + "args": { + "External id": 510593,"Sequence number": 5516315, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18629 + } + }, + { + "ph": "s", "id": 372, "pid": 2338711, "tid": 2338711, "ts": 6274144756936.819, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144756975.828, "dur": 282.340, + "args": { + "External id": 510594,"Sequence number": 5516316, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 18630 + } + }, + { + "ph": "s", "id": 371, "pid": 2338711, "tid": 2338711, "ts": 6274144756975.828, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144757000.642, "dur": 4.809, + "args": { + "External id": 510595,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144757001.550, "dur": 3.676, + "args": { + "External id": 510596,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144757010.443, "dur": 5.774, + "args": { + "External id": 510597,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 18633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144757011.691, "dur": 4.394, + "args": { + "External id": 510598,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144757012.946, "dur": 3.045, + "args": { + "External id": 510599,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144757024.030, "dur": 8.982, + "args": { + "External id": 510600,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144757028.150, "dur": 4.500, + "args": { + "External id": 510601,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144757039.221, "dur": 2.754, + "args": { + "External id": 510602,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144757045.967, "dur": 3.155, + "args": { + "External id": 510603,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144757231.039, "dur": 5.052, + "args": { + "External id": 510604,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144757232.413, "dur": 3.363, + "args": { + "External id": 510605,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144757238.881, "dur": 4.940, + "args": { + "External id": 510606,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144757242.580, "dur": 1.151, + "args": { + "External id": 510607,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144757278.732, "dur": 109.796, + "args": { + "External id": 510608,"Sequence number": 5516317, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144757280.078, "dur": 8.977, + "args": { + "External id": 510609,"Sequence number": 5516317, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18645 + } + }, + { + "ph": "s", "id": 370, "pid": 2338711, "tid": 2338711, "ts": 6274144757280.078, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144757283.246, "dur": 4.525, + "args": { + "External id": 510610,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144757285.617, "dur": 1.860, + "args": { + "External id": 510611,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144757290.219, "dur": 98.018, + "args": { + "External id": 510612,"Sequence number": 5516318, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144757294.380, "dur": 3.791, + "args": { + "External id": 510613,"Sequence number": 5516318, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144757295.075, "dur": 2.969, + "args": { + "External id": 510614,"Sequence number": 5516318, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18650 + } + }, + { + "ph": "s", "id": 369, "pid": 2338711, "tid": 2338711, "ts": 6274144757295.075, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144757299.385, "dur": 80.998, + "args": { + "External id": 510615,"Sequence number": 5516319, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18651 + } + }, + { + "ph": "s", "id": 368, "pid": 2338711, "tid": 2338711, "ts": 6274144757299.385, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144757382.729, "dur": 4.757, + "args": { + "External id": 510616,"Sequence number": 5516320, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18652 + } + }, + { + "ph": "s", "id": 367, "pid": 2338711, "tid": 2338711, "ts": 6274144757382.729, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144757399.029, "dur": 76.332, + "args": { + "External id": 510617,"Sequence number": 5516321, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144757399.926, "dur": 6.043, + "args": { + "External id": 510618,"Sequence number": 5516321, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18654 + } + }, + { + "ph": "s", "id": 366, "pid": 2338711, "tid": 2338711, "ts": 6274144757399.926, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144757401.833, "dur": 2.993, + "args": { + "External id": 510619,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144757403.833, "dur": 0.813, + "args": { + "External id": 510620,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144757406.942, "dur": 68.220, + "args": { + "External id": 510621,"Sequence number": 5516322, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144757409.856, "dur": 5.124, + "args": { + "External id": 510622,"Sequence number": 5516322, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144757411.231, "dur": 3.629, + "args": { + "External id": 510623,"Sequence number": 5516322, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18659 + } + }, + { + "ph": "s", "id": 365, "pid": 2338711, "tid": 2338711, "ts": 6274144757411.231, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144757415.768, "dur": 54.743, + "args": { + "External id": 510624,"Sequence number": 5516323, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18660 + } + }, + { + "ph": "s", "id": 364, "pid": 2338711, "tid": 2338711, "ts": 6274144757415.768, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144757473.024, "dur": 1.785, + "args": { + "External id": 510625,"Sequence number": 5516324, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18661 + } + }, + { + "ph": "s", "id": 363, "pid": 2338711, "tid": 2338711, "ts": 6274144757473.024, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144757500.817, "dur": 192.996, + "args": { + "External id": 510626,"Sequence number": 5516325, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18662 + } + }, + { + "ph": "s", "id": 362, "pid": 2338711, "tid": 2338711, "ts": 6274144757500.817, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144757544.462, "dur": 4.599, + "args": { + "External id": 510627,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144757585.652, "dur": 92.767, + "args": { + "External id": 510628,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144757604.827, "dur": 8.704, + "args": { + "External id": 510629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 18665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144757608.476, "dur": 3.984, + "args": { + "External id": 510630,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 18666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144757610.865, "dur": 1.168, + "args": { + "External id": 510631,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 18667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144757614.621, "dur": 63.387, + "args": { + "External id": 510632,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 18668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144757618.324, "dur": 3.161, + "args": { + "External id": 510633,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144757619.721, "dur": 1.475, + "args": { + "External id": 510634,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144757622.231, "dur": 51.497, + "args": { + "External id": 510635,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 18671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144757675.932, "dur": 1.287, + "args": { + "External id": 510636,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2338711, + "ts": 6274144757704.657, "dur": 29.038, + "args": { + "External id": 510637,"Sequence number": 5516326, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 18673 + } + }, + { + "ph": "s", "id": 361, "pid": 2338711, "tid": 2338711, "ts": 6274144757704.657, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144757772.142, "dur": 192.522, + "args": { + "External id": 510638,"Sequence number": 5516327, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 18674 + } + }, + { + "ph": "s", "id": 360, "pid": 2338711, "tid": 2338711, "ts": 6274144757772.142, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144757797.221, "dur": 3.503, + "args": { + "External id": 510639,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144757798.501, "dur": 1.985, + "args": { + "External id": 510640,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144757809.538, "dur": 7.723, + "args": { + "External id": 510641,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144757812.749, "dur": 4.145, + "args": { + "External id": 510642,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144757823.437, "dur": 3.642, + "args": { + "External id": 510643,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144757950.199, "dur": 3.111, + "args": { + "External id": 510644,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144757951.453, "dur": 1.592, + "args": { + "External id": 510645,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144757982.864, "dur": 131.233, + "args": { + "External id": 510646,"Sequence number": 5516328, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144757986.354, "dur": 6.778, + "args": { + "External id": 510647,"Sequence number": 5516328, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18683 + } + }, + { + "ph": "s", "id": 359, "pid": 2338711, "tid": 2338711, "ts": 6274144757986.354, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144757988.424, "dur": 3.577, + "args": { + "External id": 510648,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144757990.248, "dur": 1.565, + "args": { + "External id": 510649,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144757994.707, "dur": 119.089, + "args": { + "External id": 510650,"Sequence number": 5516329, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144757996.329, "dur": 9.107, + "args": { + "External id": 510651,"Sequence number": 5516329, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144757999.738, "dur": 5.548, + "args": { + "External id": 510652,"Sequence number": 5516329, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18688 + } + }, + { + "ph": "s", "id": 358, "pid": 2338711, "tid": 2338711, "ts": 6274144757999.738, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144758006.458, "dur": 97.540, + "args": { + "External id": 510653,"Sequence number": 5516330, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18689 + } + }, + { + "ph": "s", "id": 357, "pid": 2338711, "tid": 2338711, "ts": 6274144758006.458, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144758108.121, "dur": 4.853, + "args": { + "External id": 510654,"Sequence number": 5516331, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18690 + } + }, + { + "ph": "s", "id": 356, "pid": 2338711, "tid": 2338711, "ts": 6274144758108.121, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144758124.916, "dur": 75.684, + "args": { + "External id": 510655,"Sequence number": 5516332, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144758125.890, "dur": 11.384, + "args": { + "External id": 510656,"Sequence number": 5516332, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18692 + } + }, + { + "ph": "s", "id": 355, "pid": 2338711, "tid": 2338711, "ts": 6274144758125.890, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144758133.139, "dur": 2.754, + "args": { + "External id": 510657,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144758134.674, "dur": 1.053, + "args": { + "External id": 510658,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144758138.447, "dur": 61.759, + "args": { + "External id": 510659,"Sequence number": 5516333, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144758139.805, "dur": 5.860, + "args": { + "External id": 510660,"Sequence number": 5516333, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144758143.200, "dur": 2.332, + "args": { + "External id": 510661,"Sequence number": 5516333, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18697 + } + }, + { + "ph": "s", "id": 354, "pid": 2338711, "tid": 2338711, "ts": 6274144758143.200, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144758146.301, "dur": 47.435, + "args": { + "External id": 510662,"Sequence number": 5516334, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18698 + } + }, + { + "ph": "s", "id": 353, "pid": 2338711, "tid": 2338711, "ts": 6274144758146.301, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144758196.232, "dur": 3.292, + "args": { + "External id": 510663,"Sequence number": 5516335, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18699 + } + }, + { + "ph": "s", "id": 352, "pid": 2338711, "tid": 2338711, "ts": 6274144758196.232, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144758207.067, "dur": 61.253, + "args": { + "External id": 510664,"Sequence number": 5516336, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144758208.040, "dur": 8.205, + "args": { + "External id": 510665,"Sequence number": 5516336, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18701 + } + }, + { + "ph": "s", "id": 351, "pid": 2338711, "tid": 2338711, "ts": 6274144758208.040, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144758209.882, "dur": 5.120, + "args": { + "External id": 510666,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144758214.152, "dur": 0.722, + "args": { + "External id": 510667,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144758216.945, "dur": 51.087, + "args": { + "External id": 510668,"Sequence number": 5516337, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144758217.965, "dur": 8.237, + "args": { + "External id": 510669,"Sequence number": 5516337, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144758218.936, "dur": 7.122, + "args": { + "External id": 510670,"Sequence number": 5516337, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18706 + } + }, + { + "ph": "s", "id": 350, "pid": 2338711, "tid": 2338711, "ts": 6274144758218.936, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144758228.586, "dur": 35.600, + "args": { + "External id": 510671,"Sequence number": 5516338, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18707 + } + }, + { + "ph": "s", "id": 349, "pid": 2338711, "tid": 2338711, "ts": 6274144758228.586, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144758266.053, "dur": 1.647, + "args": { + "External id": 510672,"Sequence number": 5516339, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18708 + } + }, + { + "ph": "s", "id": 348, "pid": 2338711, "tid": 2338711, "ts": 6274144758266.053, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144758286.346, "dur": 5.756, + "args": { + "External id": 510673,"Sequence number": 5516340, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144758287.502, "dur": 4.401, + "args": { + "External id": 510674,"Sequence number": 5516340, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18710 + } + }, + { + "ph": "s", "id": 347, "pid": 2338711, "tid": 2338711, "ts": 6274144758287.502, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144758299.745, "dur": 7.691, + "args": { + "External id": 510675,"Sequence number": 5516341, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144758303.087, "dur": 4.194, + "args": { + "External id": 510676,"Sequence number": 5516341, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18712 + } + }, + { + "ph": "s", "id": 346, "pid": 2338711, "tid": 2338711, "ts": 6274144758303.087, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144758312.076, "dur": 2.925, + "args": { + "External id": 510677,"Sequence number": 5516342, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144758313.193, "dur": 1.699, + "args": { + "External id": 510678,"Sequence number": 5516342, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18714 + } + }, + { + "ph": "s", "id": 345, "pid": 2338711, "tid": 2338711, "ts": 6274144758313.193, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144758344.278, "dur": 155.923, + "args": { + "External id": 510679,"Sequence number": 5516343, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18715 + } + }, + { + "ph": "s", "id": 344, "pid": 2338711, "tid": 2338711, "ts": 6274144758344.278, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144758364.089, "dur": 13.793, + "args": { + "External id": 510680,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144758371.602, "dur": 5.843, + "args": { + "External id": 510681,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144758514.512, "dur": 126.228, + "args": { + "External id": 510682,"Sequence number": 5516344, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18718 + } + }, + { + "ph": "s", "id": 343, "pid": 2338711, "tid": 2338711, "ts": 6274144758514.512, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144758528.167, "dur": 7.574, + "args": { + "External id": 510683,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 18719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144758531.048, "dur": 4.420, + "args": { + "External id": 510684,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338711, "tid": 2338711, + "ts": 6274144758670.584, "dur": 180.254, + "args": { + "External id": 510685,"Sequence number": 5516345, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 18721 + } + }, + { + "ph": "s", "id": 342, "pid": 2338711, "tid": 2338711, "ts": 6274144758670.584, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144758701.602, "dur": 123.612, + "args": { + "External id": 510686,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144758746.137, "dur": 9.477, + "args": { + "External id": 510687,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144758750.548, "dur": 4.655, + "args": { + "External id": 510688,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144758759.069, "dur": 3.692, + "args": { + "External id": 510689,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144758763.894, "dur": 1.111, + "args": { + "External id": 510690,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144758767.748, "dur": 3.869, + "args": { + "External id": 510691,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2338711, + "ts": 6274144758837.718, "dur": 5.058, + "args": { + "External id": 510692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 18728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144758856.118, "dur": 9.099, + "args": { + "External id": 510693,"Sequence number": 5516346, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144758858.311, "dur": 6.790, + "args": { + "External id": 510694,"Sequence number": 5516346, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18730 + } + }, + { + "ph": "s", "id": 341, "pid": 2338711, "tid": 2338711, "ts": 6274144758858.311, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144758875.961, "dur": 108.300, + "args": { + "External id": 510695,"Sequence number": 5516347, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144758877.298, "dur": 9.068, + "args": { + "External id": 510696,"Sequence number": 5516347, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18732 + } + }, + { + "ph": "s", "id": 340, "pid": 2338711, "tid": 2338711, "ts": 6274144758877.298, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144758880.337, "dur": 5.112, + "args": { + "External id": 510697,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144758883.412, "dur": 1.746, + "args": { + "External id": 510698,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144758887.923, "dur": 96.035, + "args": { + "External id": 510699,"Sequence number": 5516348, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144758892.063, "dur": 3.567, + "args": { + "External id": 510700,"Sequence number": 5516348, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144758893.270, "dur": 2.181, + "args": { + "External id": 510701,"Sequence number": 5516348, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18737 + } + }, + { + "ph": "s", "id": 339, "pid": 2338711, "tid": 2338711, "ts": 6274144758893.270, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144758896.741, "dur": 77.443, + "args": { + "External id": 510702,"Sequence number": 5516349, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18738 + } + }, + { + "ph": "s", "id": 338, "pid": 2338711, "tid": 2338711, "ts": 6274144758896.741, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144758977.016, "dur": 6.310, + "args": { + "External id": 510703,"Sequence number": 5516350, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18739 + } + }, + { + "ph": "s", "id": 337, "pid": 2338711, "tid": 2338711, "ts": 6274144758977.016, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144759018.652, "dur": 261.377, + "args": { + "External id": 510704,"Sequence number": 5516351, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 18740 + } + }, + { + "ph": "s", "id": 336, "pid": 2338711, "tid": 2338711, "ts": 6274144759018.652, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144759038.450, "dur": 4.261, + "args": { + "External id": 510705,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144759041.293, "dur": 1.259, + "args": { + "External id": 510706,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144759046.639, "dur": 5.269, + "args": { + "External id": 510707,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 18743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144759047.866, "dur": 3.939, + "args": { + "External id": 510708,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144759050.894, "dur": 0.808, + "args": { + "External id": 510709,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144759095.154, "dur": 8.803, + "args": { + "External id": 510710,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144759097.270, "dur": 6.205, + "args": { + "External id": 510711,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144759110.862, "dur": 3.478, + "args": { + "External id": 510712,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144759118.309, "dur": 3.309, + "args": { + "External id": 510713,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144759256.303, "dur": 5.025, + "args": { + "External id": 510714,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144759257.836, "dur": 3.338, + "args": { + "External id": 510715,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144759263.990, "dur": 2.403, + "args": { + "External id": 510716,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144759265.381, "dur": 0.865, + "args": { + "External id": 510717,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144759297.968, "dur": 104.457, + "args": { + "External id": 510718,"Sequence number": 5516352, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144759298.973, "dur": 10.129, + "args": { + "External id": 510719,"Sequence number": 5516352, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18755 + } + }, + { + "ph": "s", "id": 335, "pid": 2338711, "tid": 2338711, "ts": 6274144759298.973, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144759301.706, "dur": 6.224, + "args": { + "External id": 510720,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144759306.158, "dur": 1.548, + "args": { + "External id": 510721,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144759310.194, "dur": 91.935, + "args": { + "External id": 510722,"Sequence number": 5516353, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144759312.095, "dur": 5.642, + "args": { + "External id": 510723,"Sequence number": 5516353, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144759312.781, "dur": 4.840, + "args": { + "External id": 510724,"Sequence number": 5516353, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18760 + } + }, + { + "ph": "s", "id": 334, "pid": 2338711, "tid": 2338711, "ts": 6274144759312.781, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144759318.613, "dur": 77.689, + "args": { + "External id": 510725,"Sequence number": 5516354, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18761 + } + }, + { + "ph": "s", "id": 333, "pid": 2338711, "tid": 2338711, "ts": 6274144759318.613, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144759398.684, "dur": 2.992, + "args": { + "External id": 510726,"Sequence number": 5516355, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18762 + } + }, + { + "ph": "s", "id": 332, "pid": 2338711, "tid": 2338711, "ts": 6274144759398.684, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144759410.900, "dur": 68.342, + "args": { + "External id": 510727,"Sequence number": 5516356, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144759411.578, "dur": 6.304, + "args": { + "External id": 510728,"Sequence number": 5516356, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18764 + } + }, + { + "ph": "s", "id": 331, "pid": 2338711, "tid": 2338711, "ts": 6274144759411.578, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144759413.822, "dur": 2.880, + "args": { + "External id": 510729,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144759415.716, "dur": 0.799, + "args": { + "External id": 510730,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144759421.003, "dur": 57.996, + "args": { + "External id": 510731,"Sequence number": 5516357, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144759421.987, "dur": 6.176, + "args": { + "External id": 510732,"Sequence number": 5516357, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144759423.169, "dur": 4.833, + "args": { + "External id": 510733,"Sequence number": 5516357, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18769 + } + }, + { + "ph": "s", "id": 330, "pid": 2338711, "tid": 2338711, "ts": 6274144759423.169, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144759428.713, "dur": 44.150, + "args": { + "External id": 510734,"Sequence number": 5516358, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18770 + } + }, + { + "ph": "s", "id": 329, "pid": 2338711, "tid": 2338711, "ts": 6274144759428.713, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144759474.957, "dur": 3.726, + "args": { + "External id": 510735,"Sequence number": 5516359, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18771 + } + }, + { + "ph": "s", "id": 328, "pid": 2338711, "tid": 2338711, "ts": 6274144759474.957, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144759499.433, "dur": 177.105, + "args": { + "External id": 510736,"Sequence number": 5516360, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18772 + } + }, + { + "ph": "s", "id": 327, "pid": 2338711, "tid": 2338711, "ts": 6274144759499.433, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144759536.704, "dur": 4.596, + "args": { + "External id": 510737,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144759574.503, "dur": 86.979, + "args": { + "External id": 510738,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144759575.622, "dur": 7.008, + "args": { + "External id": 510739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 18775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144759576.997, "dur": 4.700, + "args": { + "External id": 510740,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 18776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144759580.933, "dur": 0.586, + "args": { + "External id": 510741,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 18777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144759583.493, "dur": 77.734, + "args": { + "External id": 510742,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 18778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144759584.744, "dur": 23.414, + "args": { + "External id": 510743,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144759603.665, "dur": 4.176, + "args": { + "External id": 510744,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144759609.056, "dur": 47.872, + "args": { + "External id": 510745,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 18781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144759659.140, "dur": 1.351, + "args": { + "External id": 510746,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2338711, + "ts": 6274144759686.590, "dur": 24.343, + "args": { + "External id": 510747,"Sequence number": 5516361, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 18783 + } + }, + { + "ph": "s", "id": 326, "pid": 2338711, "tid": 2338711, "ts": 6274144759686.590, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144759749.202, "dur": 191.414, + "args": { + "External id": 510748,"Sequence number": 5516362, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 18784 + } + }, + { + "ph": "s", "id": 325, "pid": 2338711, "tid": 2338711, "ts": 6274144759749.202, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144759769.290, "dur": 3.273, + "args": { + "External id": 510749,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144759770.252, "dur": 2.162, + "args": { + "External id": 510750,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144759780.747, "dur": 7.546, + "args": { + "External id": 510751,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144759784.121, "dur": 3.743, + "args": { + "External id": 510752,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144759794.384, "dur": 3.681, + "args": { + "External id": 510753,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144759925.583, "dur": 3.697, + "args": { + "External id": 510754,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144759926.993, "dur": 2.033, + "args": { + "External id": 510755,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144759957.930, "dur": 84.433, + "args": { + "External id": 510756,"Sequence number": 5516363, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144759959.285, "dur": 6.935, + "args": { + "External id": 510757,"Sequence number": 5516363, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18793 + } + }, + { + "ph": "s", "id": 324, "pid": 2338711, "tid": 2338711, "ts": 6274144759959.285, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144759961.419, "dur": 3.645, + "args": { + "External id": 510758,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144759963.491, "dur": 1.397, + "args": { + "External id": 510759,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144759967.247, "dur": 74.796, + "args": { + "External id": 510760,"Sequence number": 5516364, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144759971.276, "dur": 6.804, + "args": { + "External id": 510761,"Sequence number": 5516364, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144759972.547, "dur": 5.386, + "args": { + "External id": 510762,"Sequence number": 5516364, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18798 + } + }, + { + "ph": "s", "id": 323, "pid": 2338711, "tid": 2338711, "ts": 6274144759972.547, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144759978.800, "dur": 56.191, + "args": { + "External id": 510763,"Sequence number": 5516365, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18799 + } + }, + { + "ph": "s", "id": 322, "pid": 2338711, "tid": 2338711, "ts": 6274144759978.800, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144760037.207, "dur": 4.289, + "args": { + "External id": 510764,"Sequence number": 5516366, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18800 + } + }, + { + "ph": "s", "id": 321, "pid": 2338711, "tid": 2338711, "ts": 6274144760037.207, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144760092.451, "dur": 71.271, + "args": { + "External id": 510765,"Sequence number": 5516367, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144760093.907, "dur": 8.034, + "args": { + "External id": 510766,"Sequence number": 5516367, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18802 + } + }, + { + "ph": "s", "id": 320, "pid": 2338711, "tid": 2338711, "ts": 6274144760093.907, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144760096.815, "dur": 3.425, + "args": { + "External id": 510767,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144760098.749, "dur": 1.219, + "args": { + "External id": 510768,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144760102.703, "dur": 60.746, + "args": { + "External id": 510769,"Sequence number": 5516368, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144760106.036, "dur": 5.432, + "args": { + "External id": 510770,"Sequence number": 5516368, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144760107.044, "dur": 4.264, + "args": { + "External id": 510771,"Sequence number": 5516368, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18807 + } + }, + { + "ph": "s", "id": 319, "pid": 2338711, "tid": 2338711, "ts": 6274144760107.044, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144760112.028, "dur": 46.810, + "args": { + "External id": 510772,"Sequence number": 5516369, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18808 + } + }, + { + "ph": "s", "id": 318, "pid": 2338711, "tid": 2338711, "ts": 6274144760112.028, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144760160.369, "dur": 2.575, + "args": { + "External id": 510773,"Sequence number": 5516370, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18809 + } + }, + { + "ph": "s", "id": 317, "pid": 2338711, "tid": 2338711, "ts": 6274144760160.369, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144760172.595, "dur": 62.967, + "args": { + "External id": 510774,"Sequence number": 5516371, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144760173.343, "dur": 8.718, + "args": { + "External id": 510775,"Sequence number": 5516371, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18811 + } + }, + { + "ph": "s", "id": 316, "pid": 2338711, "tid": 2338711, "ts": 6274144760173.343, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144760177.103, "dur": 3.621, + "args": { + "External id": 510776,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144760179.905, "dur": 0.690, + "args": { + "External id": 510777,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144760182.698, "dur": 52.636, + "args": { + "External id": 510778,"Sequence number": 5516372, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144760184.030, "dur": 7.079, + "args": { + "External id": 510779,"Sequence number": 5516372, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144760186.863, "dur": 4.021, + "args": { + "External id": 510780,"Sequence number": 5516372, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18816 + } + }, + { + "ph": "s", "id": 315, "pid": 2338711, "tid": 2338711, "ts": 6274144760186.863, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144760191.629, "dur": 35.947, + "args": { + "External id": 510781,"Sequence number": 5516373, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18817 + } + }, + { + "ph": "s", "id": 314, "pid": 2338711, "tid": 2338711, "ts": 6274144760191.629, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144760229.220, "dur": 5.414, + "args": { + "External id": 510782,"Sequence number": 5516374, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18818 + } + }, + { + "ph": "s", "id": 313, "pid": 2338711, "tid": 2338711, "ts": 6274144760229.220, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144760254.362, "dur": 6.054, + "args": { + "External id": 510783,"Sequence number": 5516375, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144760255.529, "dur": 4.742, + "args": { + "External id": 510784,"Sequence number": 5516375, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18820 + } + }, + { + "ph": "s", "id": 312, "pid": 2338711, "tid": 2338711, "ts": 6274144760255.529, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144760267.820, "dur": 3.494, + "args": { + "External id": 510785,"Sequence number": 5516376, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144760269.036, "dur": 2.171, + "args": { + "External id": 510786,"Sequence number": 5516376, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18822 + } + }, + { + "ph": "s", "id": 311, "pid": 2338711, "tid": 2338711, "ts": 6274144760269.036, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144760275.585, "dur": 3.066, + "args": { + "External id": 510787,"Sequence number": 5516377, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144760276.445, "dur": 2.106, + "args": { + "External id": 510788,"Sequence number": 5516377, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18824 + } + }, + { + "ph": "s", "id": 310, "pid": 2338711, "tid": 2338711, "ts": 6274144760276.445, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144760309.541, "dur": 160.643, + "args": { + "External id": 510789,"Sequence number": 5516378, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18825 + } + }, + { + "ph": "s", "id": 309, "pid": 2338711, "tid": 2338711, "ts": 6274144760309.541, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144760329.701, "dur": 9.329, + "args": { + "External id": 510790,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144760332.920, "dur": 5.650, + "args": { + "External id": 510791,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144760482.716, "dur": 122.620, + "args": { + "External id": 510792,"Sequence number": 5516379, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18828 + } + }, + { + "ph": "s", "id": 308, "pid": 2338711, "tid": 2338711, "ts": 6274144760482.716, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144760495.630, "dur": 7.271, + "args": { + "External id": 510793,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 18829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144760497.971, "dur": 4.492, + "args": { + "External id": 510794,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338711, "tid": 2338711, + "ts": 6274144760638.075, "dur": 187.125, + "args": { + "External id": 510795,"Sequence number": 5516380, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 18831 + } + }, + { + "ph": "s", "id": 307, "pid": 2338711, "tid": 2338711, "ts": 6274144760638.075, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144760665.392, "dur": 133.132, + "args": { + "External id": 510796,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144760714.277, "dur": 10.309, + "args": { + "External id": 510797,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144760717.095, "dur": 7.055, + "args": { + "External id": 510798,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144760727.387, "dur": 4.318, + "args": { + "External id": 510799,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144760732.719, "dur": 1.315, + "args": { + "External id": 510800,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144760737.140, "dur": 5.019, + "args": { + "External id": 510801,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2338711, + "ts": 6274144760811.380, "dur": 5.147, + "args": { + "External id": 510802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 18838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144760830.372, "dur": 6.133, + "args": { + "External id": 510803,"Sequence number": 5516381, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144760831.819, "dur": 4.555, + "args": { + "External id": 510804,"Sequence number": 5516381, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18840 + } + }, + { + "ph": "s", "id": 306, "pid": 2338711, "tid": 2338711, "ts": 6274144760831.819, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144760847.799, "dur": 109.740, + "args": { + "External id": 510805,"Sequence number": 5516382, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144760848.985, "dur": 13.291, + "args": { + "External id": 510806,"Sequence number": 5516382, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18842 + } + }, + { + "ph": "s", "id": 305, "pid": 2338711, "tid": 2338711, "ts": 6274144760848.985, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144760854.529, "dur": 6.790, + "args": { + "External id": 510807,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144760859.415, "dur": 1.663, + "args": { + "External id": 510808,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144760863.625, "dur": 93.566, + "args": { + "External id": 510809,"Sequence number": 5516383, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144760865.285, "dur": 3.463, + "args": { + "External id": 510810,"Sequence number": 5516383, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144760866.441, "dur": 2.165, + "args": { + "External id": 510811,"Sequence number": 5516383, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18847 + } + }, + { + "ph": "s", "id": 304, "pid": 2338711, "tid": 2338711, "ts": 6274144760866.441, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144760869.520, "dur": 80.788, + "args": { + "External id": 510812,"Sequence number": 5516384, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18848 + } + }, + { + "ph": "s", "id": 303, "pid": 2338711, "tid": 2338711, "ts": 6274144760869.520, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144760952.901, "dur": 3.661, + "args": { + "External id": 510813,"Sequence number": 5516385, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18849 + } + }, + { + "ph": "s", "id": 302, "pid": 2338711, "tid": 2338711, "ts": 6274144760952.901, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144760990.882, "dur": 266.570, + "args": { + "External id": 510814,"Sequence number": 5516386, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 18850 + } + }, + { + "ph": "s", "id": 301, "pid": 2338711, "tid": 2338711, "ts": 6274144760990.882, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144761009.336, "dur": 2.692, + "args": { + "External id": 510815,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144761010.481, "dur": 1.389, + "args": { + "External id": 510816,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144761016.008, "dur": 8.757, + "args": { + "External id": 510817,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 18853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144761022.568, "dur": 2.060, + "args": { + "External id": 510818,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144761023.339, "dur": 1.176, + "args": { + "External id": 510819,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144761031.595, "dur": 6.289, + "args": { + "External id": 510820,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144761033.255, "dur": 4.379, + "args": { + "External id": 510821,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144761043.689, "dur": 5.201, + "args": { + "External id": 510822,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144761054.961, "dur": 41.438, + "args": { + "External id": 510823,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144761234.424, "dur": 4.308, + "args": { + "External id": 510824,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144761235.568, "dur": 2.977, + "args": { + "External id": 510825,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144761241.607, "dur": 2.970, + "args": { + "External id": 510826,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144761242.896, "dur": 1.599, + "args": { + "External id": 510827,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144761276.186, "dur": 105.159, + "args": { + "External id": 510828,"Sequence number": 5516387, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144761277.354, "dur": 12.857, + "args": { + "External id": 510829,"Sequence number": 5516387, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18865 + } + }, + { + "ph": "s", "id": 300, "pid": 2338711, "tid": 2338711, "ts": 6274144761277.354, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144761282.767, "dur": 5.990, + "args": { + "External id": 510830,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144761285.091, "dur": 3.384, + "args": { + "External id": 510831,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144761291.401, "dur": 89.630, + "args": { + "External id": 510832,"Sequence number": 5516388, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144761293.255, "dur": 4.878, + "args": { + "External id": 510833,"Sequence number": 5516388, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144761294.286, "dur": 3.538, + "args": { + "External id": 510834,"Sequence number": 5516388, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18870 + } + }, + { + "ph": "s", "id": 299, "pid": 2338711, "tid": 2338711, "ts": 6274144761294.286, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144761299.342, "dur": 73.559, + "args": { + "External id": 510835,"Sequence number": 5516389, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18871 + } + }, + { + "ph": "s", "id": 298, "pid": 2338711, "tid": 2338711, "ts": 6274144761299.342, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144761375.117, "dur": 5.226, + "args": { + "External id": 510836,"Sequence number": 5516390, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18872 + } + }, + { + "ph": "s", "id": 297, "pid": 2338711, "tid": 2338711, "ts": 6274144761375.117, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144761389.538, "dur": 71.993, + "args": { + "External id": 510837,"Sequence number": 5516391, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144761390.119, "dur": 7.956, + "args": { + "External id": 510838,"Sequence number": 5516391, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18874 + } + }, + { + "ph": "s", "id": 296, "pid": 2338711, "tid": 2338711, "ts": 6274144761390.119, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144761392.164, "dur": 4.701, + "args": { + "External id": 510839,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144761395.984, "dur": 0.752, + "args": { + "External id": 510840,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144761398.911, "dur": 62.439, + "args": { + "External id": 510841,"Sequence number": 5516392, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144761400.179, "dur": 5.825, + "args": { + "External id": 510842,"Sequence number": 5516392, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144761401.252, "dur": 4.611, + "args": { + "External id": 510843,"Sequence number": 5516392, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18879 + } + }, + { + "ph": "s", "id": 295, "pid": 2338711, "tid": 2338711, "ts": 6274144761401.252, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144761408.728, "dur": 45.447, + "args": { + "External id": 510844,"Sequence number": 5516393, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18880 + } + }, + { + "ph": "s", "id": 294, "pid": 2338711, "tid": 2338711, "ts": 6274144761408.728, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144761456.132, "dur": 4.397, + "args": { + "External id": 510845,"Sequence number": 5516394, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18881 + } + }, + { + "ph": "s", "id": 293, "pid": 2338711, "tid": 2338711, "ts": 6274144761456.132, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144761482.828, "dur": 171.331, + "args": { + "External id": 510846,"Sequence number": 5516395, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18882 + } + }, + { + "ph": "s", "id": 292, "pid": 2338711, "tid": 2338711, "ts": 6274144761482.828, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144761519.947, "dur": 4.886, + "args": { + "External id": 510847,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144761558.406, "dur": 82.629, + "args": { + "External id": 510848,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144761559.118, "dur": 6.369, + "args": { + "External id": 510849,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 18885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144761560.337, "dur": 4.407, + "args": { + "External id": 510850,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 18886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144761562.421, "dur": 2.132, + "args": { + "External id": 510851,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 18887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144761566.460, "dur": 74.244, + "args": { + "External id": 510852,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 18888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144761568.166, "dur": 2.191, + "args": { + "External id": 510853,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144761569.093, "dur": 1.166, + "args": { + "External id": 510854,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144761573.602, "dur": 61.518, + "args": { + "External id": 510855,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 18891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144761638.655, "dur": 1.325, + "args": { + "External id": 510856,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2338711, + "ts": 6274144761663.669, "dur": 27.869, + "args": { + "External id": 510857,"Sequence number": 5516396, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 18893 + } + }, + { + "ph": "s", "id": 291, "pid": 2338711, "tid": 2338711, "ts": 6274144761663.669, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144761727.280, "dur": 178.849, + "args": { + "External id": 510858,"Sequence number": 5516397, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 18894 + } + }, + { + "ph": "s", "id": 290, "pid": 2338711, "tid": 2338711, "ts": 6274144761727.280, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144761745.701, "dur": 5.045, + "args": { + "External id": 510859,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144761747.216, "dur": 3.392, + "args": { + "External id": 510860,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144761759.013, "dur": 9.570, + "args": { + "External id": 510861,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144761763.957, "dur": 4.098, + "args": { + "External id": 510862,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144761774.608, "dur": 3.720, + "args": { + "External id": 510863,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144761892.162, "dur": 5.228, + "args": { + "External id": 510864,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144761893.305, "dur": 3.732, + "args": { + "External id": 510865,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144761923.942, "dur": 89.428, + "args": { + "External id": 510866,"Sequence number": 5516398, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144761925.112, "dur": 10.339, + "args": { + "External id": 510867,"Sequence number": 5516398, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18903 + } + }, + { + "ph": "s", "id": 289, "pid": 2338711, "tid": 2338711, "ts": 6274144761925.112, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144761927.706, "dur": 6.213, + "args": { + "External id": 510868,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144761932.510, "dur": 1.223, + "args": { + "External id": 510869,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144761936.289, "dur": 76.821, + "args": { + "External id": 510870,"Sequence number": 5516399, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144761937.573, "dur": 6.514, + "args": { + "External id": 510871,"Sequence number": 5516399, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144761938.711, "dur": 5.225, + "args": { + "External id": 510872,"Sequence number": 5516399, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18908 + } + }, + { + "ph": "s", "id": 288, "pid": 2338711, "tid": 2338711, "ts": 6274144761938.711, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144761944.946, "dur": 62.683, + "args": { + "External id": 510873,"Sequence number": 5516400, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18909 + } + }, + { + "ph": "s", "id": 287, "pid": 2338711, "tid": 2338711, "ts": 6274144761944.946, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144762009.683, "dur": 2.939, + "args": { + "External id": 510874,"Sequence number": 5516401, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18910 + } + }, + { + "ph": "s", "id": 286, "pid": 2338711, "tid": 2338711, "ts": 6274144762009.683, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144762021.488, "dur": 97.893, + "args": { + "External id": 510875,"Sequence number": 5516402, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144762022.003, "dur": 6.610, + "args": { + "External id": 510876,"Sequence number": 5516402, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18912 + } + }, + { + "ph": "s", "id": 285, "pid": 2338711, "tid": 2338711, "ts": 6274144762022.003, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144762023.853, "dur": 3.725, + "args": { + "External id": 510877,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144762026.623, "dur": 0.694, + "args": { + "External id": 510878,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144762031.643, "dur": 87.415, + "args": { + "External id": 510879,"Sequence number": 5516403, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144762033.197, "dur": 3.170, + "args": { + "External id": 510880,"Sequence number": 5516403, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144762034.067, "dur": 2.195, + "args": { + "External id": 510881,"Sequence number": 5516403, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18917 + } + }, + { + "ph": "s", "id": 284, "pid": 2338711, "tid": 2338711, "ts": 6274144762034.067, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144762037.069, "dur": 73.455, + "args": { + "External id": 510882,"Sequence number": 5516404, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18918 + } + }, + { + "ph": "s", "id": 283, "pid": 2338711, "tid": 2338711, "ts": 6274144762037.069, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144762113.883, "dur": 4.699, + "args": { + "External id": 510883,"Sequence number": 5516405, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18919 + } + }, + { + "ph": "s", "id": 282, "pid": 2338711, "tid": 2338711, "ts": 6274144762113.883, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144762131.418, "dur": 65.306, + "args": { + "External id": 510884,"Sequence number": 5516406, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144762132.190, "dur": 5.843, + "args": { + "External id": 510885,"Sequence number": 5516406, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18921 + } + }, + { + "ph": "s", "id": 281, "pid": 2338711, "tid": 2338711, "ts": 6274144762132.190, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144762134.288, "dur": 2.409, + "args": { + "External id": 510886,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144762135.793, "dur": 0.783, + "args": { + "External id": 510887,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144762138.900, "dur": 57.630, + "args": { + "External id": 510888,"Sequence number": 5516407, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144762142.493, "dur": 7.198, + "args": { + "External id": 510889,"Sequence number": 5516407, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144762143.373, "dur": 6.194, + "args": { + "External id": 510890,"Sequence number": 5516407, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18926 + } + }, + { + "ph": "s", "id": 280, "pid": 2338711, "tid": 2338711, "ts": 6274144762143.373, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144762150.445, "dur": 41.521, + "args": { + "External id": 510891,"Sequence number": 5516408, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18927 + } + }, + { + "ph": "s", "id": 279, "pid": 2338711, "tid": 2338711, "ts": 6274144762150.445, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144762193.678, "dur": 2.502, + "args": { + "External id": 510892,"Sequence number": 5516409, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18928 + } + }, + { + "ph": "s", "id": 278, "pid": 2338711, "tid": 2338711, "ts": 6274144762193.678, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144762215.895, "dur": 3.814, + "args": { + "External id": 510893,"Sequence number": 5516410, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144762216.700, "dur": 2.841, + "args": { + "External id": 510894,"Sequence number": 5516410, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18930 + } + }, + { + "ph": "s", "id": 277, "pid": 2338711, "tid": 2338711, "ts": 6274144762216.700, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144762227.106, "dur": 5.426, + "args": { + "External id": 510895,"Sequence number": 5516411, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144762228.498, "dur": 3.893, + "args": { + "External id": 510896,"Sequence number": 5516411, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18932 + } + }, + { + "ph": "s", "id": 276, "pid": 2338711, "tid": 2338711, "ts": 6274144762228.498, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144762236.482, "dur": 4.774, + "args": { + "External id": 510897,"Sequence number": 5516412, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144762237.612, "dur": 3.426, + "args": { + "External id": 510898,"Sequence number": 5516412, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18934 + } + }, + { + "ph": "s", "id": 275, "pid": 2338711, "tid": 2338711, "ts": 6274144762237.612, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144762270.840, "dur": 158.836, + "args": { + "External id": 510899,"Sequence number": 5516413, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18935 + } + }, + { + "ph": "s", "id": 274, "pid": 2338711, "tid": 2338711, "ts": 6274144762270.840, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144762290.345, "dur": 8.781, + "args": { + "External id": 510900,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144762293.059, "dur": 5.658, + "args": { + "External id": 510901,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144762441.929, "dur": 100.128, + "args": { + "External id": 510902,"Sequence number": 5516414, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18938 + } + }, + { + "ph": "s", "id": 273, "pid": 2338711, "tid": 2338711, "ts": 6274144762441.929, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144762454.550, "dur": 7.392, + "args": { + "External id": 510903,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 18939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144762457.249, "dur": 4.323, + "args": { + "External id": 510904,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338711, "tid": 2338711, + "ts": 6274144762570.937, "dur": 205.636, + "args": { + "External id": 510905,"Sequence number": 5516415, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 18941 + } + }, + { + "ph": "s", "id": 272, "pid": 2338711, "tid": 2338711, "ts": 6274144762570.937, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274144762612.828, "dur": 132.108, + "args": { + "External id": 510906,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144762665.976, "dur": 8.402, + "args": { + "External id": 510907,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144762669.201, "dur": 4.636, + "args": { + "External id": 510908,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144762677.238, "dur": 3.809, + "args": { + "External id": 510909,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144762685.772, "dur": 0.890, + "args": { + "External id": 510910,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144762689.084, "dur": 3.358, + "args": { + "External id": 510911,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338711, "tid": 2338711, + "ts": 6274144762758.089, "dur": 5.252, + "args": { + "External id": 510912,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 18948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144762781.803, "dur": 6.601, + "args": { + "External id": 510913,"Sequence number": 5516416, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144762783.318, "dur": 4.965, + "args": { + "External id": 510914,"Sequence number": 5516416, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18950 + } + }, + { + "ph": "s", "id": 271, "pid": 2338711, "tid": 2338711, "ts": 6274144762783.318, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144762799.936, "dur": 111.921, + "args": { + "External id": 510915,"Sequence number": 5516417, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144762801.280, "dur": 10.658, + "args": { + "External id": 510916,"Sequence number": 5516417, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18952 + } + }, + { + "ph": "s", "id": 270, "pid": 2338711, "tid": 2338711, "ts": 6274144762801.280, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144762804.069, "dur": 6.848, + "args": { + "External id": 510917,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144762808.814, "dur": 1.833, + "args": { + "External id": 510918,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144762813.367, "dur": 98.178, + "args": { + "External id": 510919,"Sequence number": 5516418, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144762815.384, "dur": 3.681, + "args": { + "External id": 510920,"Sequence number": 5516418, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144762816.524, "dur": 2.405, + "args": { + "External id": 510921,"Sequence number": 5516418, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18957 + } + }, + { + "ph": "s", "id": 269, "pid": 2338711, "tid": 2338711, "ts": 6274144762816.524, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144762822.329, "dur": 81.643, + "args": { + "External id": 510922,"Sequence number": 5516419, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18958 + } + }, + { + "ph": "s", "id": 268, "pid": 2338711, "tid": 2338711, "ts": 6274144762822.329, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144762906.597, "dur": 4.306, + "args": { + "External id": 510923,"Sequence number": 5516420, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18959 + } + }, + { + "ph": "s", "id": 267, "pid": 2338711, "tid": 2338711, "ts": 6274144762906.597, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144762950.031, "dur": 266.882, + "args": { + "External id": 510924,"Sequence number": 5516421, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 18960 + } + }, + { + "ph": "s", "id": 266, "pid": 2338711, "tid": 2338711, "ts": 6274144762950.031, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144762968.481, "dur": 4.288, + "args": { + "External id": 510925,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144762971.103, "dur": 1.515, + "args": { + "External id": 510926,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338711, "tid": 2338711, + "ts": 6274144762976.974, "dur": 3.393, + "args": { + "External id": 510927,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 18963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144762978.112, "dur": 2.152, + "args": { + "External id": 510928,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144762979.020, "dur": 1.150, + "args": { + "External id": 510929,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144762987.478, "dur": 7.635, + "args": { + "External id": 510930,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144762989.908, "dur": 4.887, + "args": { + "External id": 510931,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144763003.158, "dur": 4.892, + "args": { + "External id": 510932,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144763013.467, "dur": 2.970, + "args": { + "External id": 510933,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144763191.507, "dur": 4.549, + "args": { + "External id": 510934,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144763192.609, "dur": 3.064, + "args": { + "External id": 510935,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144763199.294, "dur": 2.976, + "args": { + "External id": 510936,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144763200.821, "dur": 1.355, + "args": { + "External id": 510937,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144763234.560, "dur": 105.893, + "args": { + "External id": 510938,"Sequence number": 5516422, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144763235.830, "dur": 9.910, + "args": { + "External id": 510939,"Sequence number": 5516422, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18975 + } + }, + { + "ph": "s", "id": 265, "pid": 2338711, "tid": 2338711, "ts": 6274144763235.830, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144763240.432, "dur": 4.091, + "args": { + "External id": 510940,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144763242.604, "dur": 1.651, + "args": { + "External id": 510941,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144763247.357, "dur": 92.670, + "args": { + "External id": 510942,"Sequence number": 5516423, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144763249.157, "dur": 5.852, + "args": { + "External id": 510943,"Sequence number": 5516423, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144763252.153, "dur": 2.671, + "args": { + "External id": 510944,"Sequence number": 5516423, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18980 + } + }, + { + "ph": "s", "id": 264, "pid": 2338711, "tid": 2338711, "ts": 6274144763252.153, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144763256.051, "dur": 76.075, + "args": { + "External id": 510945,"Sequence number": 5516424, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18981 + } + }, + { + "ph": "s", "id": 263, "pid": 2338711, "tid": 2338711, "ts": 6274144763256.051, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144763334.866, "dur": 4.536, + "args": { + "External id": 510946,"Sequence number": 5516425, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18982 + } + }, + { + "ph": "s", "id": 262, "pid": 2338711, "tid": 2338711, "ts": 6274144763334.866, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144763348.302, "dur": 83.148, + "args": { + "External id": 510947,"Sequence number": 5516426, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144763349.247, "dur": 10.378, + "args": { + "External id": 510948,"Sequence number": 5516426, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18984 + } + }, + { + "ph": "s", "id": 261, "pid": 2338711, "tid": 2338711, "ts": 6274144763349.247, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144763355.519, "dur": 2.780, + "args": { + "External id": 510949,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144763357.193, "dur": 0.937, + "args": { + "External id": 510950,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144763360.425, "dur": 70.741, + "args": { + "External id": 510951,"Sequence number": 5516427, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144763361.527, "dur": 6.232, + "args": { + "External id": 510952,"Sequence number": 5516427, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144763362.303, "dur": 5.318, + "args": { + "External id": 510953,"Sequence number": 5516427, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18989 + } + }, + { + "ph": "s", "id": 260, "pid": 2338711, "tid": 2338711, "ts": 6274144763362.303, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144763368.395, "dur": 54.812, + "args": { + "External id": 510954,"Sequence number": 5516428, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18990 + } + }, + { + "ph": "s", "id": 259, "pid": 2338711, "tid": 2338711, "ts": 6274144763368.395, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144763425.394, "dur": 4.763, + "args": { + "External id": 510955,"Sequence number": 5516429, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18991 + } + }, + { + "ph": "s", "id": 258, "pid": 2338711, "tid": 2338711, "ts": 6274144763425.394, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144763452.728, "dur": 175.638, + "args": { + "External id": 510956,"Sequence number": 5516430, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18992 + } + }, + { + "ph": "s", "id": 257, "pid": 2338711, "tid": 2338711, "ts": 6274144763452.728, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144763494.818, "dur": 5.033, + "args": { + "External id": 510957,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144763530.282, "dur": 82.573, + "args": { + "External id": 510958,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144763531.115, "dur": 6.803, + "args": { + "External id": 510959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 18995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144763532.418, "dur": 4.753, + "args": { + "External id": 510960,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 18996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144763536.283, "dur": 0.708, + "args": { + "External id": 510961,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 18997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144763538.662, "dur": 73.825, + "args": { + "External id": 510962,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 18998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338711, "tid": 2338711, + "ts": 6274144763540.230, "dur": 4.363, + "args": { + "External id": 510963,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144763543.156, "dur": 1.327, + "args": { + "External id": 510964,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 19000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144763545.287, "dur": 61.073, + "args": { + "External id": 510965,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 19001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338711, "tid": 2338711, + "ts": 6274144763610.385, "dur": 1.279, + "args": { + "External id": 510966,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2338711, + "ts": 6274144763637.916, "dur": 26.229, + "args": { + "External id": 510967,"Sequence number": 5516431, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 19003 + } + }, + { + "ph": "s", "id": 256, "pid": 2338711, "tid": 2338711, "ts": 6274144763637.916, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2338711, "tid": 2338711, + "ts": 6274144763683.911, "dur": 40.673, + "args": { + "External id": 510968,"Sequence number": 5516432, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "-2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1]], []], "Input Dims": [[[8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096]], []], "Ev Idx": 19004 + } + }, + { + "ph": "s", "id": 255, "pid": 2338711, "tid": 2338711, "ts": 6274144763683.911, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2338711, "tid": 2338711, + "ts": 6274144763691.861, "dur": 28.087, + "args": { + "External id": 510969,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1]], []], "Input Dims": [[[8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096]], []], "Ev Idx": 19005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144763721.442, "dur": 1.265, + "args": { + "External id": 510970,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 16384], []], "Ev Idx": 19006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338711, "tid": 2338711, + "ts": 6274144763760.464, "dur": 48.748, + "args": { + "External id": 510971,"Record function id": 0, "Ev Idx": 19007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 2/0", "pid": 2338711, "tid": 2338711, + "ts": 6274144763810.395, "dur": 194.904, + "args": { + "External id": 510972,"Record function id": 0, "Ev Idx": 19008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144763850.011, "dur": 146.740, + "args": { + "External id": 510973,"Sequence number": 5516433, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1], [67108864, 16384, 4096, 1]], "Input Dims": [[4096], [8, 4096, 4, 4096]], "Ev Idx": 19009 + } + }, + { + "ph": "s", "id": 254, "pid": 2338711, "tid": 2338711, "ts": 6274144763850.011, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338711, "tid": 2338711, + "ts": 6274144763920.408, "dur": 38.841, + "args": { + "External id": 510974,"kernel_hash": "cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vb/cvb5q5vgi2ya3llutub7eh77teolhudusc7aq54l5vmhwf6ipesd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[131072, 4096], [131072, 4096], [4096], [131072], [], [], [], [], [], [], [], [], []], "Ev Idx": 19010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2338711, + "ts": 6274144764134.450, "dur": 76.902, + "args": { + "External id": 510975,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144764137.154, "dur": 6.593, + "args": { + "External id": 510976,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274144764180.208, "dur": 30.860, + "args": { + "External id": 510977,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 19013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144764183.462, "dur": 26.941, + "args": { + "External id": 510978,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 19014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2338711, + "ts": 6274144764217.246, "dur": 17.648, + "args": { + "External id": 510979,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144764218.350, "dur": 2.034, + "args": { + "External id": 510980,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274144764221.045, "dur": 13.588, + "args": { + "External id": 510981,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 19017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144764224.031, "dur": 10.152, + "args": { + "External id": 510982,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 19018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2338711, + "ts": 6274144764237.838, "dur": 13.856, + "args": { + "External id": 510983,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144764238.710, "dur": 2.548, + "args": { + "External id": 510984,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274144764241.721, "dur": 9.732, + "args": { + "External id": 510985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 19021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144764242.119, "dur": 9.038, + "args": { + "External id": 510986,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 19022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144764262.511, "dur": 0.683, + "args": { + "External id": 510987,"Sequence number": 5516434, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], [], []], "Ev Idx": 19023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338711, "tid": 2338711, + "ts": 6274144764270.579, "dur": 11.259, + "args": { + "External id": 510988,"Sequence number": 5516434, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "5", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 19024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144764278.108, "dur": 2.020, + "args": { + "External id": 510989,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 19025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144764288.735, "dur": 7.333, + "args": { + "External id": 510990,"Sequence number": 5516434, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 19026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144764293.437, "dur": 0.705, + "args": { + "External id": 510991,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 19027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144764297.304, "dur": 2.963, + "args": { + "External id": 510992,"Sequence number": 5516434, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 19028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144764299.004, "dur": 0.423, + "args": { + "External id": 510993,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 19029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144764301.485, "dur": 2.743, + "args": { + "External id": 510994,"Sequence number": 5516434, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 19030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144764303.221, "dur": 0.461, + "args": { + "External id": 510995,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 19031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144764307.045, "dur": 5.396, + "args": { + "External id": 510996,"Sequence number": 5516434, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 4], [], [], [], []], "Ev Idx": 19032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144764308.910, "dur": 2.982, + "args": { + "External id": 510997,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 4], [], [], []], "Ev Idx": 19033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144764313.666, "dur": 2.891, + "args": { + "External id": 510998,"Sequence number": 5516434, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 4], [], [], [], []], "Ev Idx": 19034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144764315.404, "dur": 0.394, + "args": { + "External id": 510999,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 4], [], [], []], "Ev Idx": 19035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144764317.602, "dur": 2.771, + "args": { + "External id": 511000,"Sequence number": 5516434, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4096, 4], [], [], [], []], "Ev Idx": 19036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144764319.477, "dur": 0.370, + "args": { + "External id": 511001,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4096, 4], [], [], []], "Ev Idx": 19037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144764324.677, "dur": 5.172, + "args": { + "External id": 511002,"Sequence number": 5516434, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4096, 4], [], []], "Ev Idx": 19038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144764328.748, "dur": 0.294, + "args": { + "External id": 511003,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4096, 4], [], [], []], "Ev Idx": 19039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144764333.651, "dur": 2.871, + "args": { + "External id": 511004,"Sequence number": 5516434, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 19040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144764335.468, "dur": 0.473, + "args": { + "External id": 511005,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2338711, + "ts": 6274144764339.459, "dur": 8.081, + "args": { + "External id": 511006,"Sequence number": 5516434, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 19042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144764345.874, "dur": 0.574, + "args": { + "External id": 511007,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144764348.557, "dur": 2.491, + "args": { + "External id": 511008,"Sequence number": 5516434, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 19044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144764350.047, "dur": 0.425, + "args": { + "External id": 511009,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 19045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144764353.645, "dur": 6.246, + "args": { + "External id": 511010,"Sequence number": 5516434, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19046 + } + }, + { + "ph": "s", "id": 253, "pid": 2338711, "tid": 2338711, "ts": 6274144764353.645, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144764357.472, "dur": 0.643, + "args": { + "External id": 511011,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144764362.882, "dur": 6.233, + "args": { + "External id": 511012,"Sequence number": 5516435, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19048 + } + }, + { + "ph": "s", "id": 252, "pid": 2338711, "tid": 2338711, "ts": 6274144764362.882, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144764366.121, "dur": 2.208, + "args": { + "External id": 511013,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2338711, + "ts": 6274144764370.094, "dur": 5.704, + "args": { + "External id": 511014,"Sequence number": 5516436, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 19050 + } + }, + { + "ph": "s", "id": 251, "pid": 2338711, "tid": 2338711, "ts": 6274144764370.094, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144764374.112, "dur": 0.463, + "args": { + "External id": 511015,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144764376.934, "dur": 5.346, + "args": { + "External id": 511016,"Sequence number": 5516437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 19052 + } + }, + { + "ph": "s", "id": 250, "pid": 2338711, "tid": 2338711, "ts": 6274144764376.934, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144764381.209, "dur": 0.309, + "args": { + "External id": 511017,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 19053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2338711, + "ts": 6274144764385.734, "dur": 36.192, + "args": { + "External id": 511018,"Sequence number": 5516438, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2338711, + "ts": 6274144764389.204, "dur": 32.520, + "args": { + "External id": 511019,"Sequence number": 5516438, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144764392.011, "dur": 5.648, + "args": { + "External id": 511020,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 19056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144764393.689, "dur": 3.409, + "args": { + "External id": 511021,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144764399.195, "dur": 22.063, + "args": { + "External id": 511022,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 19058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144764446.387, "dur": 3.939, + "args": { + "External id": 511023,"Sequence number": 5516438, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 19059 + } + }, + { + "ph": "s", "id": 249, "pid": 2338711, "tid": 2338711, "ts": 6274144764446.387, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144764452.775, "dur": 1.055, + "args": { + "External id": 511024,"Sequence number": 5516439, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144764482.465, "dur": 140380.817, + "args": { + "External id": 511025,"Sequence number": 5516439, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 19061 + } + }, + { + "ph": "s", "id": 248, "pid": 2338711, "tid": 2338711, "ts": 6274144764482.465, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2338711, + "ts": 6274144764495.285, "dur": 26.424, + "args": { + "External id": 511026,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2338711, + "ts": 6274144764495.968, "dur": 25.508, + "args": { + "External id": 511027,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144764497.212, "dur": 6.660, + "args": { + "External id": 511028,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144764500.165, "dur": 3.375, + "args": { + "External id": 511029,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144764504.730, "dur": 15.997, + "args": { + "External id": 511030,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 19066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144764538.789, "dur": 22.521, + "args": { + "External id": 511031,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144764539.875, "dur": 6.000, + "args": { + "External id": 511032,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144764542.411, "dur": 3.173, + "args": { + "External id": 511033,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274144764546.758, "dur": 14.359, + "args": { + "External id": 511034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144764547.410, "dur": 13.364, + "args": { + "External id": 511035,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144764564.642, "dur": 20.962, + "args": { + "External id": 511036,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144764565.622, "dur": 8.185, + "args": { + "External id": 511037,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144764569.138, "dur": 4.439, + "args": { + "External id": 511038,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274144764574.231, "dur": 11.185, + "args": { + "External id": 511039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144764574.825, "dur": 10.248, + "args": { + "External id": 511040,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 19076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2338711, + "ts": 6274144764607.923, "dur": 19.038, + "args": { + "External id": 511041,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144764609.158, "dur": 4.271, + "args": { + "External id": 511042,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274144764614.270, "dur": 12.470, + "args": { + "External id": 511043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 19079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144764614.969, "dur": 11.294, + "args": { + "External id": 511044,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338711, "tid": 2338711, + "ts": 6274144764635.758, "dur": 28.742, + "args": { + "External id": 511045,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144764668.547, "dur": 59.909, + "args": { + "External id": 511046,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144764672.234, "dur": 55.725, + "args": { + "External id": 511047,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144764679.067, "dur": 1.031, + "args": { + "External id": 511048,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144764683.838, "dur": 25.473, + "args": { + "External id": 511049,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144764685.576, "dur": 23.520, + "args": { + "External id": 511050,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 19086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144764688.412, "dur": 3.034, + "args": { + "External id": 511051,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144764692.210, "dur": 16.477, + "args": { + "External id": 511052,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 19088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338711, "tid": 2338711, + "ts": 6274144764736.272, "dur": 134225.803, + "args": { + "External id": 511053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338711, "tid": 2338711, + "ts": 6274144764738.117, "dur": 134221.981, + "args": { + "External id": 511054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144898982.205, "dur": 10.712, + "args": { + "External id": 511055,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144898988.761, "dur": 1.695, + "args": { + "External id": 511056,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144898999.038, "dur": 146.697, + "args": { + "External id": 511057,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144899000.723, "dur": 7.905, + "args": { + "External id": 511058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144899003.716, "dur": 3.975, + "args": { + "External id": 511059,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144899006.386, "dur": 1.006, + "args": { + "External id": 511060,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144899012.629, "dur": 132.122, + "args": { + "External id": 511061,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144899014.849, "dur": 128.370, + "args": { + "External id": 511062,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144899152.220, "dur": 6.195, + "args": { + "External id": 511063,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144899155.380, "dur": 0.895, + "args": { + "External id": 511064,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144899169.522, "dur": 4.564, + "args": { + "External id": 511065,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144899187.531, "dur": 13.262, + "args": { + "External id": 511066,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144899192.660, "dur": 7.757, + "args": { + "External id": 511067,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144899350.828, "dur": 251.149, + "args": { + "External id": 511068,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144899355.384, "dur": 2.313, + "args": { + "External id": 511069,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144899360.673, "dur": 240.342, + "args": { + "External id": 511070,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144899363.167, "dur": 0.925, + "args": { + "External id": 511071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144899366.542, "dur": 32.420, + "args": { + "External id": 511072,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144899401.156, "dur": 4.068, + "args": { + "External id": 511073,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144899404.061, "dur": 0.672, + "args": { + "External id": 511074,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144899407.217, "dur": 31.959, + "args": { + "External id": 511075,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144899410.592, "dur": 1.487, + "args": { + "External id": 511076,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144899414.601, "dur": 24.290, + "args": { + "External id": 511077,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144899421.170, "dur": 3.296, + "args": { + "External id": 511078,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144899441.706, "dur": 26.499, + "args": { + "External id": 511079,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144899471.706, "dur": 17.131, + "args": { + "External id": 511080,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144899492.663, "dur": 17.313, + "args": { + "External id": 511081,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144899512.817, "dur": 16.111, + "args": { + "External id": 511082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144899531.442, "dur": 23.657, + "args": { + "External id": 511083,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144899534.408, "dur": 1.716, + "args": { + "External id": 511084,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144899540.490, "dur": 0.622, + "args": { + "External id": 511085,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144899557.454, "dur": 13.785, + "args": { + "External id": 511086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144899573.621, "dur": 11.407, + "args": { + "External id": 511087,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144899612.150, "dur": 2.919, + "args": { + "External id": 511088,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144899623.888, "dur": 4.626, + "args": { + "External id": 511089,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144899626.832, "dur": 0.606, + "args": { + "External id": 511090,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144899715.868, "dur": 76.979, + "args": { + "External id": 511091,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144899801.482, "dur": 8.241, + "args": { + "External id": 511092,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144899804.591, "dur": 3.761, + "args": { + "External id": 511093,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144899811.654, "dur": 30.256, + "args": { + "External id": 511094,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144899847.897, "dur": 6.872, + "args": { + "External id": 511095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144899850.138, "dur": 3.882, + "args": { + "External id": 511096,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144899852.892, "dur": 0.892, + "args": { + "External id": 511097,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144899860.722, "dur": 48.222, + "args": { + "External id": 511098,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144899862.310, "dur": 45.945, + "args": { + "External id": 511099,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144899914.837, "dur": 17.080, + "args": { + "External id": 511100,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144899938.818, "dur": 4.081, + "args": { + "External id": 511101,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144899941.320, "dur": 0.630, + "args": { + "External id": 511102,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144899947.408, "dur": 50.147, + "args": { + "External id": 511103,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144899948.433, "dur": 7.329, + "args": { + "External id": 511104,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144899951.513, "dur": 3.575, + "args": { + "External id": 511105,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144899952.886, "dur": 1.969, + "args": { + "External id": 511106,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144899956.374, "dur": 40.808, + "args": { + "External id": 511107,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144899957.279, "dur": 39.298, + "args": { + "External id": 511108,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144900002.390, "dur": 3.860, + "args": { + "External id": 511109,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144900004.678, "dur": 0.324, + "args": { + "External id": 511110,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144900012.442, "dur": 1.849, + "args": { + "External id": 511111,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144900025.360, "dur": 6.809, + "args": { + "External id": 511112,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144900027.477, "dur": 4.335, + "args": { + "External id": 511113,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144900172.630, "dur": 193.951, + "args": { + "External id": 511114,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144900175.785, "dur": 3.172, + "args": { + "External id": 511115,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144900181.244, "dur": 184.804, + "args": { + "External id": 511116,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144900183.160, "dur": 0.632, + "args": { + "External id": 511117,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144900189.360, "dur": 22.804, + "args": { + "External id": 511118,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144900213.969, "dur": 6.925, + "args": { + "External id": 511119,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144900217.096, "dur": 3.461, + "args": { + "External id": 511120,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144900222.131, "dur": 23.269, + "args": { + "External id": 511121,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144900223.424, "dur": 2.592, + "args": { + "External id": 511122,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144900227.618, "dur": 17.537, + "args": { + "External id": 511123,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144900230.989, "dur": 2.164, + "args": { + "External id": 511124,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144900246.995, "dur": 20.941, + "args": { + "External id": 511125,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144900270.110, "dur": 13.994, + "args": { + "External id": 511126,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144900287.423, "dur": 12.600, + "args": { + "External id": 511127,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144900301.846, "dur": 12.291, + "args": { + "External id": 511128,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144900318.583, "dur": 20.982, + "args": { + "External id": 511129,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144900321.089, "dur": 1.905, + "args": { + "External id": 511130,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144900325.649, "dur": 0.707, + "args": { + "External id": 511131,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144900341.086, "dur": 11.483, + "args": { + "External id": 511132,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144900353.975, "dur": 10.941, + "args": { + "External id": 511133,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144900374.242, "dur": 2.514, + "args": { + "External id": 511134,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144900387.388, "dur": 4.061, + "args": { + "External id": 511135,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144900390.249, "dur": 0.342, + "args": { + "External id": 511136,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144900464.679, "dur": 58.945, + "args": { + "External id": 511137,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144900528.937, "dur": 4.908, + "args": { + "External id": 511138,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144900531.781, "dur": 0.940, + "args": { + "External id": 511139,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144900535.346, "dur": 24.574, + "args": { + "External id": 511140,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144900564.709, "dur": 10.047, + "args": { + "External id": 511141,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144900566.605, "dur": 7.418, + "args": { + "External id": 511142,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144900571.633, "dur": 2.107, + "args": { + "External id": 511143,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144900577.682, "dur": 62.505, + "args": { + "External id": 511144,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144900578.998, "dur": 60.195, + "args": { + "External id": 511145,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144900646.212, "dur": 16.378, + "args": { + "External id": 511146,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144900669.307, "dur": 9.080, + "args": { + "External id": 511147,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144900676.534, "dur": 0.749, + "args": { + "External id": 511148,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144900683.128, "dur": 50.494, + "args": { + "External id": 511149,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144900686.853, "dur": 4.095, + "args": { + "External id": 511150,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144900688.194, "dur": 2.133, + "args": { + "External id": 511151,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144900689.608, "dur": 0.574, + "args": { + "External id": 511152,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144900691.785, "dur": 41.351, + "args": { + "External id": 511153,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144900692.716, "dur": 39.772, + "args": { + "External id": 511154,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144900737.265, "dur": 6.395, + "args": { + "External id": 511155,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144900739.701, "dur": 2.693, + "args": { + "External id": 511156,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144900752.879, "dur": 2.072, + "args": { + "External id": 511157,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144900763.769, "dur": 7.426, + "args": { + "External id": 511158,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144900766.008, "dur": 4.893, + "args": { + "External id": 511159,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144900861.851, "dur": 234.420, + "args": { + "External id": 511160,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144900864.045, "dur": 1.904, + "args": { + "External id": 511161,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144900867.922, "dur": 227.915, + "args": { + "External id": 511162,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144900871.578, "dur": 0.305, + "args": { + "External id": 511163,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144900873.518, "dur": 21.793, + "args": { + "External id": 511164,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144900897.032, "dur": 3.061, + "args": { + "External id": 511165,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144900899.313, "dur": 0.545, + "args": { + "External id": 511166,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144900901.052, "dur": 22.373, + "args": { + "External id": 511167,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144900902.441, "dur": 1.487, + "args": { + "External id": 511168,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144900905.582, "dur": 17.516, + "args": { + "External id": 511169,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144900909.979, "dur": 2.277, + "args": { + "External id": 511170,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144900924.854, "dur": 25.389, + "args": { + "External id": 511171,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144900952.250, "dur": 14.912, + "args": { + "External id": 511172,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144900971.797, "dur": 15.444, + "args": { + "External id": 511173,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144900988.812, "dur": 13.158, + "args": { + "External id": 511174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144901003.967, "dur": 24.617, + "args": { + "External id": 511175,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144901006.182, "dur": 1.460, + "args": { + "External id": 511176,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144901009.567, "dur": 0.727, + "args": { + "External id": 511177,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144901029.900, "dur": 13.683, + "args": { + "External id": 511178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144901047.284, "dur": 46.482, + "args": { + "External id": 511179,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144901105.321, "dur": 2.876, + "args": { + "External id": 511180,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144901118.408, "dur": 3.534, + "args": { + "External id": 511181,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144901120.765, "dur": 0.395, + "args": { + "External id": 511182,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144901197.754, "dur": 82.826, + "args": { + "External id": 511183,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144901285.698, "dur": 5.816, + "args": { + "External id": 511184,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144901289.155, "dur": 1.137, + "args": { + "External id": 511185,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144901293.315, "dur": 27.538, + "args": { + "External id": 511186,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144901328.383, "dur": 5.850, + "args": { + "External id": 511187,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144901330.179, "dur": 3.385, + "args": { + "External id": 511188,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144901332.261, "dur": 1.117, + "args": { + "External id": 511189,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144901337.320, "dur": 43.410, + "args": { + "External id": 511190,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144901338.646, "dur": 41.596, + "args": { + "External id": 511191,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144901384.667, "dur": 14.130, + "args": { + "External id": 511192,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144901404.987, "dur": 6.134, + "args": { + "External id": 511193,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144901409.739, "dur": 0.580, + "args": { + "External id": 511194,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144901415.680, "dur": 49.727, + "args": { + "External id": 511195,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144901416.735, "dur": 5.281, + "args": { + "External id": 511196,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144901417.547, "dur": 3.872, + "args": { + "External id": 511197,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144901418.897, "dur": 2.371, + "args": { + "External id": 511198,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144901422.761, "dur": 42.245, + "args": { + "External id": 511199,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144901425.678, "dur": 38.749, + "args": { + "External id": 511200,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144901469.410, "dur": 3.884, + "args": { + "External id": 511201,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144901471.631, "dur": 0.508, + "args": { + "External id": 511202,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144901478.407, "dur": 1.451, + "args": { + "External id": 511203,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144901488.427, "dur": 6.258, + "args": { + "External id": 511204,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144901490.530, "dur": 3.903, + "args": { + "External id": 511205,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144901580.940, "dur": 202.329, + "args": { + "External id": 511206,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144901583.284, "dur": 23.359, + "args": { + "External id": 511207,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144901609.288, "dur": 173.395, + "args": { + "External id": 511208,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144901610.775, "dur": 0.393, + "args": { + "External id": 511209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144901613.598, "dur": 23.015, + "args": { + "External id": 511210,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144901638.464, "dur": 3.243, + "args": { + "External id": 511211,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144901640.806, "dur": 0.664, + "args": { + "External id": 511212,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144901642.894, "dur": 24.435, + "args": { + "External id": 511213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144901644.000, "dur": 2.762, + "args": { + "External id": 511214,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144901650.448, "dur": 16.551, + "args": { + "External id": 511215,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144901653.390, "dur": 2.185, + "args": { + "External id": 511216,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144901668.661, "dur": 19.811, + "args": { + "External id": 511217,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144901690.483, "dur": 14.114, + "args": { + "External id": 511218,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144901707.576, "dur": 11.739, + "args": { + "External id": 511219,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144901720.842, "dur": 12.226, + "args": { + "External id": 511220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144901734.930, "dur": 19.243, + "args": { + "External id": 511221,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144901737.123, "dur": 1.855, + "args": { + "External id": 511222,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144901740.967, "dur": 0.536, + "args": { + "External id": 511223,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144901757.606, "dur": 11.829, + "args": { + "External id": 511224,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144901770.690, "dur": 10.987, + "args": { + "External id": 511225,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144901791.481, "dur": 2.573, + "args": { + "External id": 511226,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144901803.508, "dur": 3.477, + "args": { + "External id": 511227,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144901805.759, "dur": 0.473, + "args": { + "External id": 511228,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144901874.819, "dur": 54.748, + "args": { + "External id": 511229,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144901934.744, "dur": 4.724, + "args": { + "External id": 511230,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144901937.715, "dur": 0.724, + "args": { + "External id": 511231,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144901943.425, "dur": 22.948, + "args": { + "External id": 511232,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144901970.727, "dur": 6.421, + "args": { + "External id": 511233,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144901972.245, "dur": 4.246, + "args": { + "External id": 511234,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144901974.118, "dur": 2.161, + "args": { + "External id": 511235,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144901980.135, "dur": 42.130, + "args": { + "External id": 511236,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144901981.573, "dur": 40.154, + "args": { + "External id": 511237,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144902025.925, "dur": 13.422, + "args": { + "External id": 511238,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144902047.164, "dur": 3.981, + "args": { + "External id": 511239,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144902049.508, "dur": 0.754, + "args": { + "External id": 511240,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144902090.091, "dur": 57.313, + "args": { + "External id": 511241,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144902091.210, "dur": 4.434, + "args": { + "External id": 511242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144902092.427, "dur": 2.457, + "args": { + "External id": 511243,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144902093.738, "dur": 0.899, + "args": { + "External id": 511244,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144902099.212, "dur": 47.761, + "args": { + "External id": 511245,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144902100.090, "dur": 46.345, + "args": { + "External id": 511246,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144902152.742, "dur": 4.746, + "args": { + "External id": 511247,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144902155.608, "dur": 0.522, + "args": { + "External id": 511248,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144902163.685, "dur": 1.753, + "args": { + "External id": 511249,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144902173.936, "dur": 9.096, + "args": { + "External id": 511250,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144902178.274, "dur": 4.511, + "args": { + "External id": 511251,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144902268.946, "dur": 185.068, + "args": { + "External id": 511252,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144902271.451, "dur": 1.799, + "args": { + "External id": 511253,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144902276.827, "dur": 176.673, + "args": { + "External id": 511254,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144902278.100, "dur": 0.531, + "args": { + "External id": 511255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144902279.809, "dur": 20.360, + "args": { + "External id": 511256,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144902301.682, "dur": 4.805, + "args": { + "External id": 511257,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144902303.994, "dur": 2.298, + "args": { + "External id": 511258,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144902307.480, "dur": 24.114, + "args": { + "External id": 511259,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144902311.059, "dur": 1.290, + "args": { + "External id": 511260,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144902313.722, "dur": 17.651, + "args": { + "External id": 511261,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144902316.928, "dur": 2.714, + "args": { + "External id": 511262,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144902333.321, "dur": 21.710, + "args": { + "External id": 511263,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144902356.583, "dur": 12.650, + "args": { + "External id": 511264,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144902371.828, "dur": 15.578, + "args": { + "External id": 511265,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144902388.955, "dur": 12.853, + "args": { + "External id": 511266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144902403.785, "dur": 23.700, + "args": { + "External id": 511267,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144902406.154, "dur": 1.366, + "args": { + "External id": 511268,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144902411.495, "dur": 2.146, + "args": { + "External id": 511269,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144902428.937, "dur": 11.366, + "args": { + "External id": 511270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144902441.273, "dur": 11.274, + "args": { + "External id": 511271,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144902461.145, "dur": 2.057, + "args": { + "External id": 511272,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144902471.441, "dur": 3.699, + "args": { + "External id": 511273,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144902473.916, "dur": 0.443, + "args": { + "External id": 511274,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144902537.478, "dur": 68.582, + "args": { + "External id": 511275,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144902623.458, "dur": 15.785, + "args": { + "External id": 511276,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144902636.165, "dur": 1.376, + "args": { + "External id": 511277,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144902640.768, "dur": 27.861, + "args": { + "External id": 511278,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144902673.759, "dur": 6.807, + "args": { + "External id": 511279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144902675.456, "dur": 4.391, + "args": { + "External id": 511280,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144902677.451, "dur": 2.065, + "args": { + "External id": 511281,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144902685.971, "dur": 45.847, + "args": { + "External id": 511282,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144902687.185, "dur": 44.068, + "args": { + "External id": 511283,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144902735.569, "dur": 14.878, + "args": { + "External id": 511284,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144902756.467, "dur": 3.969, + "args": { + "External id": 511285,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144902759.163, "dur": 0.394, + "args": { + "External id": 511286,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144902764.558, "dur": 48.007, + "args": { + "External id": 511287,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144902765.326, "dur": 5.487, + "args": { + "External id": 511288,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144902768.201, "dur": 2.051, + "args": { + "External id": 511289,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144902769.609, "dur": 0.527, + "args": { + "External id": 511290,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144902771.287, "dur": 40.899, + "args": { + "External id": 511291,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144902774.147, "dur": 37.398, + "args": { + "External id": 511292,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144902816.694, "dur": 4.248, + "args": { + "External id": 511293,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144902818.934, "dur": 0.824, + "args": { + "External id": 511294,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144902827.191, "dur": 1.578, + "args": { + "External id": 511295,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144902839.462, "dur": 9.487, + "args": { + "External id": 511296,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144902841.549, "dur": 7.087, + "args": { + "External id": 511297,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144902943.432, "dur": 219.531, + "args": { + "External id": 511298,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144902945.600, "dur": 2.043, + "args": { + "External id": 511299,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144902949.199, "dur": 213.177, + "args": { + "External id": 511300,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144902950.933, "dur": 0.339, + "args": { + "External id": 511301,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144902954.343, "dur": 22.402, + "args": { + "External id": 511302,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144902978.275, "dur": 3.326, + "args": { + "External id": 511303,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144902980.387, "dur": 0.788, + "args": { + "External id": 511304,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144902982.599, "dur": 21.937, + "args": { + "External id": 511305,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144902983.846, "dur": 1.303, + "args": { + "External id": 511306,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144902986.237, "dur": 17.962, + "args": { + "External id": 511307,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144902990.572, "dur": 2.545, + "args": { + "External id": 511308,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144903005.746, "dur": 19.370, + "args": { + "External id": 511309,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144903026.965, "dur": 13.102, + "args": { + "External id": 511310,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144903042.646, "dur": 47.411, + "args": { + "External id": 511311,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144903092.821, "dur": 13.693, + "args": { + "External id": 511312,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144903111.291, "dur": 21.917, + "args": { + "External id": 511313,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144903113.925, "dur": 2.099, + "args": { + "External id": 511314,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144903118.383, "dur": 0.810, + "args": { + "External id": 511315,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144903134.917, "dur": 13.544, + "args": { + "External id": 511316,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144903149.385, "dur": 11.573, + "args": { + "External id": 511317,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144903171.145, "dur": 2.496, + "args": { + "External id": 511318,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144903183.928, "dur": 4.087, + "args": { + "External id": 511319,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144903186.423, "dur": 0.838, + "args": { + "External id": 511320,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144903266.707, "dur": 57.152, + "args": { + "External id": 511321,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144903328.895, "dur": 5.174, + "args": { + "External id": 511322,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144903332.087, "dur": 1.015, + "args": { + "External id": 511323,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144903335.494, "dur": 23.695, + "args": { + "External id": 511324,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144903363.752, "dur": 12.313, + "args": { + "External id": 511325,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144903365.613, "dur": 9.755, + "args": { + "External id": 511326,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144903374.247, "dur": 0.930, + "args": { + "External id": 511327,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144903379.062, "dur": 42.593, + "args": { + "External id": 511328,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144903380.385, "dur": 40.578, + "args": { + "External id": 511329,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144903425.190, "dur": 15.764, + "args": { + "External id": 511330,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144903446.436, "dur": 3.977, + "args": { + "External id": 511331,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144903448.935, "dur": 0.611, + "args": { + "External id": 511332,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144903454.256, "dur": 50.581, + "args": { + "External id": 511333,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144903457.401, "dur": 5.215, + "args": { + "External id": 511334,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144903458.410, "dur": 3.653, + "args": { + "External id": 511335,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144903459.963, "dur": 1.967, + "args": { + "External id": 511336,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144903463.401, "dur": 40.976, + "args": { + "External id": 511337,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144903464.299, "dur": 39.487, + "args": { + "External id": 511338,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144903508.673, "dur": 4.094, + "args": { + "External id": 511339,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144903510.907, "dur": 0.747, + "args": { + "External id": 511340,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144903520.613, "dur": 1.375, + "args": { + "External id": 511341,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144903530.045, "dur": 5.681, + "args": { + "External id": 511342,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144903531.875, "dur": 3.559, + "args": { + "External id": 511343,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144903641.368, "dur": 182.011, + "args": { + "External id": 511344,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144903644.035, "dur": 3.257, + "args": { + "External id": 511345,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144903649.280, "dur": 173.582, + "args": { + "External id": 511346,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144903652.798, "dur": 0.681, + "args": { + "External id": 511347,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144903656.282, "dur": 21.426, + "args": { + "External id": 511348,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144903679.382, "dur": 3.582, + "args": { + "External id": 511349,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144903681.769, "dur": 0.881, + "args": { + "External id": 511350,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144903684.009, "dur": 22.588, + "args": { + "External id": 511351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144903685.356, "dur": 2.330, + "args": { + "External id": 511352,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144903688.928, "dur": 17.221, + "args": { + "External id": 511353,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144903692.132, "dur": 2.568, + "args": { + "External id": 511354,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144903707.932, "dur": 22.856, + "args": { + "External id": 511355,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144903732.550, "dur": 12.518, + "args": { + "External id": 511356,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144903750.399, "dur": 12.518, + "args": { + "External id": 511357,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144903764.346, "dur": 11.294, + "args": { + "External id": 511358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144903777.413, "dur": 18.859, + "args": { + "External id": 511359,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144903779.514, "dur": 1.705, + "args": { + "External id": 511360,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144903783.131, "dur": 0.876, + "args": { + "External id": 511361,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144903797.688, "dur": 10.822, + "args": { + "External id": 511362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144903811.621, "dur": 10.127, + "args": { + "External id": 511363,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144903830.669, "dur": 2.188, + "args": { + "External id": 511364,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144903842.233, "dur": 3.693, + "args": { + "External id": 511365,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144903844.766, "dur": 0.453, + "args": { + "External id": 511366,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144903909.538, "dur": 54.012, + "args": { + "External id": 511367,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144903968.339, "dur": 4.541, + "args": { + "External id": 511368,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144903971.254, "dur": 0.650, + "args": { + "External id": 511369,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144903974.764, "dur": 21.933, + "args": { + "External id": 511370,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144904003.148, "dur": 6.547, + "args": { + "External id": 511371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144904005.011, "dur": 4.009, + "args": { + "External id": 511372,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144904006.733, "dur": 2.104, + "args": { + "External id": 511373,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144904012.130, "dur": 75.743, + "args": { + "External id": 511374,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144904013.792, "dur": 72.897, + "args": { + "External id": 511375,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144904094.082, "dur": 16.640, + "args": { + "External id": 511376,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144904117.360, "dur": 7.417, + "args": { + "External id": 511377,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144904123.008, "dur": 0.676, + "args": { + "External id": 511378,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144904128.962, "dur": 52.140, + "args": { + "External id": 511379,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144904130.013, "dur": 4.417, + "args": { + "External id": 511380,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144904131.224, "dur": 2.600, + "args": { + "External id": 511381,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144904132.952, "dur": 0.763, + "args": { + "External id": 511382,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144904135.294, "dur": 45.308, + "args": { + "External id": 511383,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144904138.132, "dur": 41.925, + "args": { + "External id": 511384,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144904185.270, "dur": 4.330, + "args": { + "External id": 511385,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144904187.780, "dur": 0.599, + "args": { + "External id": 511386,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144904195.727, "dur": 1.607, + "args": { + "External id": 511387,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144904205.722, "dur": 6.265, + "args": { + "External id": 511388,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144904207.858, "dur": 3.825, + "args": { + "External id": 511389,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144904298.887, "dur": 178.342, + "args": { + "External id": 511390,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144904300.814, "dur": 3.785, + "args": { + "External id": 511391,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144904307.933, "dur": 168.660, + "args": { + "External id": 511392,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144904309.369, "dur": 0.508, + "args": { + "External id": 511393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144904311.459, "dur": 19.680, + "args": { + "External id": 511394,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144904332.653, "dur": 7.441, + "args": { + "External id": 511395,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144904337.941, "dur": 1.898, + "args": { + "External id": 511396,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144904341.073, "dur": 22.684, + "args": { + "External id": 511397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144904344.849, "dur": 1.291, + "args": { + "External id": 511398,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144904347.257, "dur": 16.151, + "args": { + "External id": 511399,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144904350.350, "dur": 1.970, + "args": { + "External id": 511400,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144904364.950, "dur": 19.474, + "args": { + "External id": 511401,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144904386.137, "dur": 13.327, + "args": { + "External id": 511402,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144904401.877, "dur": 11.575, + "args": { + "External id": 511403,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144904414.664, "dur": 11.764, + "args": { + "External id": 511404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144904428.026, "dur": 22.545, + "args": { + "External id": 511405,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144904430.403, "dur": 1.661, + "args": { + "External id": 511406,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144904436.721, "dur": 1.792, + "args": { + "External id": 511407,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144904452.111, "dur": 11.220, + "args": { + "External id": 511408,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144904464.390, "dur": 11.181, + "args": { + "External id": 511409,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144904483.891, "dur": 1.794, + "args": { + "External id": 511410,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144904493.574, "dur": 3.743, + "args": { + "External id": 511411,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144904496.027, "dur": 0.585, + "args": { + "External id": 511412,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144904558.621, "dur": 68.652, + "args": { + "External id": 511413,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144904633.938, "dur": 8.588, + "args": { + "External id": 511414,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144904640.199, "dur": 0.886, + "args": { + "External id": 511415,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144904644.348, "dur": 23.907, + "args": { + "External id": 511416,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144904673.285, "dur": 5.560, + "args": { + "External id": 511417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144904674.649, "dur": 3.559, + "args": { + "External id": 511418,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144904676.839, "dur": 1.184, + "args": { + "External id": 511419,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144904681.711, "dur": 42.618, + "args": { + "External id": 511420,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144904682.568, "dur": 41.152, + "args": { + "External id": 511421,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144904730.413, "dur": 14.914, + "args": { + "External id": 511422,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144904750.334, "dur": 23.886, + "args": { + "External id": 511423,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144904752.528, "dur": 21.301, + "args": { + "External id": 511424,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144904757.525, "dur": 1.119, + "args": { + "External id": 511425,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144904779.940, "dur": 34.707, + "args": { + "External id": 511426,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144904781.754, "dur": 32.636, + "args": { + "External id": 511427,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 19463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144904786.698, "dur": 3.868, + "args": { + "External id": 511428,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144904791.776, "dur": 22.113, + "args": { + "External id": 511429,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2338711, + "ts": 6274144904830.773, "dur": 5.683, + "args": { + "External id": 511430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2338711, + "ts": 6274144904832.921, "dur": 3.232, + "args": { + "External id": 511431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2338711, + "ts": 6274144904837.933, "dur": 1.096, + "args": { + "External id": 511432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2338711, + "ts": 6274144904838.398, "dur": 0.550, + "args": { + "External id": 511433,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144904881.225, "dur": 23.330, + "args": { + "External id": 511434,"Sequence number": 5516440, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 19470 + } + }, + { + "ph": "s", "id": 247, "pid": 2338711, "tid": 2338711, "ts": 6274144904881.225, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144904910.682, "dur": 6.178, + "args": { + "External id": 511435,"Sequence number": 5516441, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 19471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144904914.462, "dur": 1.083, + "args": { + "External id": 511436,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2338711, + "ts": 6274144904921.736, "dur": 6.525, + "args": { + "External id": 511437,"Sequence number": 5516441, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "1"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 19473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144904926.463, "dur": 0.633, + "args": { + "External id": 511438,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144904929.408, "dur": 2.588, + "args": { + "External id": 511439,"Sequence number": 5516441, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 19475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144904930.915, "dur": 0.439, + "args": { + "External id": 511440,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 19476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144904936.230, "dur": 8.295, + "args": { + "External id": 511441,"Sequence number": 5516441, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19477 + } + }, + { + "ph": "s", "id": 246, "pid": 2338711, "tid": 2338711, "ts": 6274144904936.230, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144904942.082, "dur": 1.085, + "args": { + "External id": 511442,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144904945.440, "dur": 4.992, + "args": { + "External id": 511443,"Sequence number": 5516442, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19479 + } + }, + { + "ph": "s", "id": 245, "pid": 2338711, "tid": 2338711, "ts": 6274144904945.440, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144904949.208, "dur": 0.442, + "args": { + "External id": 511444,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2338711, + "ts": 6274144904951.503, "dur": 7.800, + "args": { + "External id": 511445,"Sequence number": 5516443, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 19481 + } + }, + { + "ph": "s", "id": 244, "pid": 2338711, "tid": 2338711, "ts": 6274144904951.503, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144904957.532, "dur": 0.712, + "args": { + "External id": 511446,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144904960.390, "dur": 6.595, + "args": { + "External id": 511447,"Sequence number": 5516444, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 19483 + } + }, + { + "ph": "s", "id": 243, "pid": 2338711, "tid": 2338711, "ts": 6274144904960.390, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144904963.435, "dur": 2.853, + "args": { + "External id": 511448,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 19484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2338711, + "ts": 6274144904970.511, "dur": 31.342, + "args": { + "External id": 511449,"Sequence number": 5516445, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2338711, + "ts": 6274144904972.345, "dur": 29.305, + "args": { + "External id": 511450,"Sequence number": 5516445, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144904975.023, "dur": 6.676, + "args": { + "External id": 511451,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 19487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144904977.522, "dur": 3.656, + "args": { + "External id": 511452,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144904982.581, "dur": 18.628, + "args": { + "External id": 511453,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 19489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144905027.036, "dur": 5.675, + "args": { + "External id": 511454,"Sequence number": 5516445, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 19490 + } + }, + { + "ph": "s", "id": 242, "pid": 2338711, "tid": 2338711, "ts": 6274144905027.036, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144905037.727, "dur": 0.889, + "args": { + "External id": 511455,"Sequence number": 5516446, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144905111.597, "dur": 48215.281, + "args": { + "External id": 511456,"Sequence number": 5516446, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 19492 + } + }, + { + "ph": "s", "id": 241, "pid": 2338711, "tid": 2338711, "ts": 6274144905111.597, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2338711, + "ts": 6274144905129.058, "dur": 35.058, + "args": { + "External id": 511457,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2338711, + "ts": 6274144905129.938, "dur": 34.004, + "args": { + "External id": 511458,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144905131.798, "dur": 7.060, + "args": { + "External id": 511459,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144905133.439, "dur": 4.731, + "args": { + "External id": 511460,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144905139.464, "dur": 24.109, + "args": { + "External id": 511461,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 19497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144905181.256, "dur": 26.749, + "args": { + "External id": 511462,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144905182.186, "dur": 5.671, + "args": { + "External id": 511463,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144905184.183, "dur": 3.384, + "args": { + "External id": 511464,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274144905189.038, "dur": 18.733, + "args": { + "External id": 511465,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144905192.925, "dur": 14.287, + "args": { + "External id": 511466,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144905211.603, "dur": 20.597, + "args": { + "External id": 511467,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144905212.705, "dur": 5.412, + "args": { + "External id": 511468,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144905214.521, "dur": 3.350, + "args": { + "External id": 511469,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274144905218.563, "dur": 13.444, + "args": { + "External id": 511470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144905219.437, "dur": 12.279, + "args": { + "External id": 511471,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 19507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2338711, + "ts": 6274144905238.891, "dur": 19.840, + "args": { + "External id": 511472,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144905241.579, "dur": 2.562, + "args": { + "External id": 511473,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274144905244.907, "dur": 13.567, + "args": { + "External id": 511474,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 19510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144905247.296, "dur": 10.651, + "args": { + "External id": 511475,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338711, "tid": 2338711, + "ts": 6274144905264.314, "dur": 26.421, + "args": { + "External id": 511476,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144905293.488, "dur": 51.919, + "args": { + "External id": 511477,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144905295.769, "dur": 49.254, + "args": { + "External id": 511478,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144905299.790, "dur": 1.103, + "args": { + "External id": 511479,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144905302.758, "dur": 23.847, + "args": { + "External id": 511480,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144905304.435, "dur": 21.943, + "args": { + "External id": 511481,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 19517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144905308.365, "dur": 3.313, + "args": { + "External id": 511482,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144905312.587, "dur": 13.391, + "args": { + "External id": 511483,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 19519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338711, "tid": 2338711, + "ts": 6274144905349.838, "dur": 42155.617, + "args": { + "External id": 511484,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338711, "tid": 2338711, + "ts": 6274144905351.578, "dur": 42152.529, + "args": { + "External id": 511485,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144947519.518, "dur": 8.061, + "args": { + "External id": 511486,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144947524.191, "dur": 1.352, + "args": { + "External id": 511487,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144947532.855, "dur": 144.670, + "args": { + "External id": 511488,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144947555.986, "dur": 6.920, + "args": { + "External id": 511489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144947558.321, "dur": 3.586, + "args": { + "External id": 511490,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144947560.887, "dur": 0.740, + "args": { + "External id": 511491,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144947566.889, "dur": 109.796, + "args": { + "External id": 511492,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144947568.458, "dur": 107.024, + "args": { + "External id": 511493,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144947683.318, "dur": 5.687, + "args": { + "External id": 511494,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144947686.041, "dur": 0.746, + "args": { + "External id": 511495,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144947698.000, "dur": 2.709, + "args": { + "External id": 511496,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144947710.905, "dur": 10.134, + "args": { + "External id": 511497,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144947715.763, "dur": 4.948, + "args": { + "External id": 511498,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144947859.750, "dur": 246.515, + "args": { + "External id": 511499,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144947863.851, "dur": 2.259, + "args": { + "External id": 511500,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144947867.682, "dur": 237.948, + "args": { + "External id": 511501,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144947869.379, "dur": 0.473, + "args": { + "External id": 511502,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144947871.495, "dur": 31.325, + "args": { + "External id": 511503,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144947904.522, "dur": 5.003, + "args": { + "External id": 511504,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144947908.509, "dur": 0.724, + "args": { + "External id": 511505,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144947910.630, "dur": 26.571, + "args": { + "External id": 511506,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144947913.581, "dur": 1.117, + "args": { + "External id": 511507,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144947916.373, "dur": 20.408, + "args": { + "External id": 511508,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144947920.160, "dur": 3.306, + "args": { + "External id": 511509,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144947938.775, "dur": 24.239, + "args": { + "External id": 511510,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144947965.327, "dur": 16.641, + "args": { + "External id": 511511,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144947985.303, "dur": 14.084, + "args": { + "External id": 511512,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144948000.928, "dur": 14.864, + "args": { + "External id": 511513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144948017.703, "dur": 24.534, + "args": { + "External id": 511514,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144948019.989, "dur": 1.827, + "args": { + "External id": 511515,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144948026.247, "dur": 0.503, + "args": { + "External id": 511516,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144948044.059, "dur": 44.292, + "args": { + "External id": 511517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144948090.932, "dur": 12.943, + "args": { + "External id": 511518,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144948115.275, "dur": 2.853, + "args": { + "External id": 511519,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144948126.033, "dur": 4.481, + "args": { + "External id": 511520,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144948128.872, "dur": 0.583, + "args": { + "External id": 511521,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144948213.492, "dur": 73.914, + "args": { + "External id": 511522,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144948296.457, "dur": 6.535, + "args": { + "External id": 511523,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144948299.486, "dur": 0.693, + "args": { + "External id": 511524,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144948304.472, "dur": 26.780, + "args": { + "External id": 511525,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144948339.615, "dur": 7.607, + "args": { + "External id": 511526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144948341.416, "dur": 4.896, + "args": { + "External id": 511527,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144948343.612, "dur": 2.445, + "args": { + "External id": 511528,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144948352.338, "dur": 53.339, + "args": { + "External id": 511529,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144948353.683, "dur": 51.457, + "args": { + "External id": 511530,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144948410.497, "dur": 17.052, + "args": { + "External id": 511531,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144948433.613, "dur": 3.916, + "args": { + "External id": 511532,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144948435.939, "dur": 0.722, + "args": { + "External id": 511533,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144948442.430, "dur": 48.807, + "args": { + "External id": 511534,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144948443.539, "dur": 6.412, + "args": { + "External id": 511535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144948447.158, "dur": 2.146, + "args": { + "External id": 511536,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144948448.626, "dur": 0.513, + "args": { + "External id": 511537,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144948450.815, "dur": 40.078, + "args": { + "External id": 511538,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144948451.241, "dur": 39.025, + "args": { + "External id": 511539,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144948495.552, "dur": 3.673, + "args": { + "External id": 511540,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144948497.768, "dur": 0.353, + "args": { + "External id": 511541,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144948504.797, "dur": 1.724, + "args": { + "External id": 511542,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144948516.897, "dur": 7.418, + "args": { + "External id": 511543,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144948519.255, "dur": 4.721, + "args": { + "External id": 511544,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144948644.228, "dur": 201.406, + "args": { + "External id": 511545,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144948647.066, "dur": 2.882, + "args": { + "External id": 511546,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144948653.731, "dur": 191.300, + "args": { + "External id": 511547,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144948655.221, "dur": 0.415, + "args": { + "External id": 511548,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144948659.005, "dur": 22.643, + "args": { + "External id": 511549,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144948683.190, "dur": 4.836, + "args": { + "External id": 511550,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144948685.570, "dur": 2.163, + "args": { + "External id": 511551,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144948689.015, "dur": 24.111, + "args": { + "External id": 511552,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144948690.200, "dur": 3.307, + "args": { + "External id": 511553,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144948695.192, "dur": 17.700, + "args": { + "External id": 511554,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144948698.331, "dur": 2.204, + "args": { + "External id": 511555,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144948714.561, "dur": 20.877, + "args": { + "External id": 511556,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144948737.379, "dur": 14.488, + "args": { + "External id": 511557,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144948755.057, "dur": 15.280, + "args": { + "External id": 511558,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144948771.977, "dur": 14.211, + "args": { + "External id": 511559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144948790.097, "dur": 25.274, + "args": { + "External id": 511560,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144948792.865, "dur": 2.376, + "args": { + "External id": 511561,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144948797.351, "dur": 2.302, + "args": { + "External id": 511562,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144948816.888, "dur": 13.509, + "args": { + "External id": 511563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144948831.461, "dur": 12.457, + "args": { + "External id": 511564,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144948853.237, "dur": 2.196, + "args": { + "External id": 511565,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144948865.474, "dur": 4.085, + "args": { + "External id": 511566,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144948868.183, "dur": 0.588, + "args": { + "External id": 511567,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144948943.854, "dur": 58.050, + "args": { + "External id": 511568,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144949007.453, "dur": 4.764, + "args": { + "External id": 511569,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144949010.393, "dur": 0.740, + "args": { + "External id": 511570,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144949013.702, "dur": 25.631, + "args": { + "External id": 511571,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144949044.167, "dur": 8.330, + "args": { + "External id": 511572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144949045.684, "dur": 6.163, + "args": { + "External id": 511573,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144949049.911, "dur": 1.700, + "args": { + "External id": 511574,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144949089.089, "dur": 52.474, + "args": { + "External id": 511575,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144949090.617, "dur": 50.195, + "args": { + "External id": 511576,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144949146.091, "dur": 18.402, + "args": { + "External id": 511577,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144949170.661, "dur": 5.262, + "args": { + "External id": 511578,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144949173.796, "dur": 0.971, + "args": { + "External id": 511579,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144949180.372, "dur": 48.424, + "args": { + "External id": 511580,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144949183.677, "dur": 3.828, + "args": { + "External id": 511581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144949184.672, "dur": 2.205, + "args": { + "External id": 511582,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144949186.242, "dur": 0.495, + "args": { + "External id": 511583,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144949188.250, "dur": 40.067, + "args": { + "External id": 511584,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144949189.069, "dur": 38.757, + "args": { + "External id": 511585,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144949233.139, "dur": 4.155, + "args": { + "External id": 511586,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144949235.619, "dur": 0.550, + "args": { + "External id": 511587,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144949245.961, "dur": 1.939, + "args": { + "External id": 511588,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144949256.624, "dur": 8.886, + "args": { + "External id": 511589,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144949258.938, "dur": 6.282, + "args": { + "External id": 511590,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144949357.279, "dur": 188.321, + "args": { + "External id": 511591,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144949360.811, "dur": 1.963, + "args": { + "External id": 511592,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144949364.736, "dur": 180.294, + "args": { + "External id": 511593,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144949368.200, "dur": 0.614, + "args": { + "External id": 511594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144949369.831, "dur": 19.834, + "args": { + "External id": 511595,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144949391.202, "dur": 4.323, + "args": { + "External id": 511596,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144949394.159, "dur": 1.064, + "args": { + "External id": 511597,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144949396.725, "dur": 19.833, + "args": { + "External id": 511598,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144949397.942, "dur": 0.981, + "args": { + "External id": 511599,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144949400.113, "dur": 16.201, + "args": { + "External id": 511600,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144949402.495, "dur": 2.849, + "args": { + "External id": 511601,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144949417.881, "dur": 22.302, + "args": { + "External id": 511602,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144949441.608, "dur": 13.704, + "args": { + "External id": 511603,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144949460.692, "dur": 15.074, + "args": { + "External id": 511604,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144949477.141, "dur": 14.807, + "args": { + "External id": 511605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144949493.959, "dur": 20.031, + "args": { + "External id": 511606,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144949497.204, "dur": 1.309, + "args": { + "External id": 511607,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144949500.535, "dur": 0.574, + "args": { + "External id": 511608,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144949515.611, "dur": 14.045, + "args": { + "External id": 511609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144949533.056, "dur": 11.037, + "args": { + "External id": 511610,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144949552.091, "dur": 1.670, + "args": { + "External id": 511611,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144949562.973, "dur": 3.501, + "args": { + "External id": 511612,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144949565.129, "dur": 0.640, + "args": { + "External id": 511613,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144949655.094, "dur": 62.736, + "args": { + "External id": 511614,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144949723.598, "dur": 5.891, + "args": { + "External id": 511615,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144949726.998, "dur": 0.989, + "args": { + "External id": 511616,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144949730.789, "dur": 27.239, + "args": { + "External id": 511617,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144949765.354, "dur": 5.640, + "args": { + "External id": 511618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144949766.664, "dur": 3.630, + "args": { + "External id": 511619,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144949769.062, "dur": 1.026, + "args": { + "External id": 511620,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144949773.368, "dur": 69.364, + "args": { + "External id": 511621,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144949774.632, "dur": 67.509, + "args": { + "External id": 511622,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144949846.865, "dur": 19.113, + "args": { + "External id": 511623,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144949871.942, "dur": 5.972, + "args": { + "External id": 511624,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144949876.462, "dur": 0.594, + "args": { + "External id": 511625,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144949882.231, "dur": 48.874, + "args": { + "External id": 511626,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144949883.075, "dur": 3.954, + "args": { + "External id": 511627,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144949883.897, "dur": 2.551, + "args": { + "External id": 511628,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144949885.518, "dur": 0.789, + "args": { + "External id": 511629,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144949887.742, "dur": 43.033, + "args": { + "External id": 511630,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144949890.535, "dur": 39.521, + "args": { + "External id": 511631,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144949935.236, "dur": 3.855, + "args": { + "External id": 511632,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144949937.549, "dur": 0.375, + "args": { + "External id": 511633,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144949945.162, "dur": 1.649, + "args": { + "External id": 511634,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144949954.495, "dur": 8.225, + "args": { + "External id": 511635,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144949956.659, "dur": 5.736, + "args": { + "External id": 511636,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144950050.133, "dur": 216.932, + "args": { + "External id": 511637,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144950085.883, "dur": 3.741, + "args": { + "External id": 511638,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144950091.530, "dur": 174.835, + "args": { + "External id": 511639,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144950093.378, "dur": 0.347, + "args": { + "External id": 511640,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144950095.075, "dur": 22.993, + "args": { + "External id": 511641,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144950119.684, "dur": 5.345, + "args": { + "External id": 511642,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144950123.524, "dur": 1.085, + "args": { + "External id": 511643,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144950126.030, "dur": 23.433, + "args": { + "External id": 511644,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144950127.132, "dur": 1.155, + "args": { + "External id": 511645,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144950132.117, "dur": 17.091, + "args": { + "External id": 511646,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144950135.519, "dur": 1.852, + "args": { + "External id": 511647,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144950150.757, "dur": 21.647, + "args": { + "External id": 511648,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144950173.631, "dur": 13.241, + "args": { + "External id": 511649,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144950189.737, "dur": 12.297, + "args": { + "External id": 511650,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144950205.633, "dur": 11.569, + "args": { + "External id": 511651,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144950218.859, "dur": 19.313, + "args": { + "External id": 511652,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144950221.292, "dur": 2.016, + "args": { + "External id": 511653,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144950225.112, "dur": 0.673, + "args": { + "External id": 511654,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144950241.822, "dur": 11.343, + "args": { + "External id": 511655,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144950254.449, "dur": 10.985, + "args": { + "External id": 511656,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144950275.365, "dur": 2.208, + "args": { + "External id": 511657,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144950287.114, "dur": 3.971, + "args": { + "External id": 511658,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144950289.633, "dur": 0.652, + "args": { + "External id": 511659,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144950362.635, "dur": 55.257, + "args": { + "External id": 511660,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144950422.979, "dur": 4.714, + "args": { + "External id": 511661,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144950425.990, "dur": 0.742, + "args": { + "External id": 511662,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144950431.394, "dur": 21.909, + "args": { + "External id": 511663,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144950457.898, "dur": 6.332, + "args": { + "External id": 511664,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144950459.470, "dur": 4.135, + "args": { + "External id": 511665,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144950461.253, "dur": 2.134, + "args": { + "External id": 511666,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144950466.791, "dur": 42.074, + "args": { + "External id": 511667,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144950468.014, "dur": 40.221, + "args": { + "External id": 511668,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144950512.549, "dur": 15.665, + "args": { + "External id": 511669,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144950536.202, "dur": 3.533, + "args": { + "External id": 511670,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144950538.509, "dur": 0.385, + "args": { + "External id": 511671,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144950543.842, "dur": 61.916, + "args": { + "External id": 511672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144950544.874, "dur": 3.504, + "args": { + "External id": 511673,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144950545.923, "dur": 1.900, + "args": { + "External id": 511674,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144950547.203, "dur": 0.494, + "args": { + "External id": 511675,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144950551.677, "dur": 53.422, + "args": { + "External id": 511676,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144950552.349, "dur": 51.888, + "args": { + "External id": 511677,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144950612.557, "dur": 4.994, + "args": { + "External id": 511678,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144950615.396, "dur": 0.516, + "args": { + "External id": 511679,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144950623.744, "dur": 1.424, + "args": { + "External id": 511680,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144950633.101, "dur": 10.726, + "args": { + "External id": 511681,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144950637.171, "dur": 6.397, + "args": { + "External id": 511682,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144950731.370, "dur": 183.259, + "args": { + "External id": 511683,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144950733.493, "dur": 1.936, + "args": { + "External id": 511684,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144950739.068, "dur": 175.022, + "args": { + "External id": 511685,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144950740.645, "dur": 0.565, + "args": { + "External id": 511686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144950742.326, "dur": 21.053, + "args": { + "External id": 511687,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144950765.202, "dur": 4.677, + "args": { + "External id": 511688,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144950767.283, "dur": 2.337, + "args": { + "External id": 511689,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144950770.860, "dur": 22.855, + "args": { + "External id": 511690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144950774.723, "dur": 1.008, + "args": { + "External id": 511691,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144950776.968, "dur": 16.435, + "args": { + "External id": 511692,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144950779.586, "dur": 2.776, + "args": { + "External id": 511693,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144950795.349, "dur": 19.801, + "args": { + "External id": 511694,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144950816.626, "dur": 12.797, + "args": { + "External id": 511695,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144950832.194, "dur": 15.589, + "args": { + "External id": 511696,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144950849.414, "dur": 13.475, + "args": { + "External id": 511697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144950864.463, "dur": 23.629, + "args": { + "External id": 511698,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144950866.792, "dur": 1.484, + "args": { + "External id": 511699,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144950872.577, "dur": 2.244, + "args": { + "External id": 511700,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144950889.394, "dur": 11.458, + "args": { + "External id": 511701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144950901.915, "dur": 11.307, + "args": { + "External id": 511702,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144950921.015, "dur": 1.587, + "args": { + "External id": 511703,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144950931.211, "dur": 3.468, + "args": { + "External id": 511704,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144950933.610, "dur": 0.277, + "args": { + "External id": 511705,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144950998.133, "dur": 93.633, + "args": { + "External id": 511706,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144951101.573, "dur": 7.056, + "args": { + "External id": 511707,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144951105.762, "dur": 1.252, + "args": { + "External id": 511708,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144951110.284, "dur": 30.540, + "args": { + "External id": 511709,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144951146.554, "dur": 6.419, + "args": { + "External id": 511710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144951148.433, "dur": 3.756, + "args": { + "External id": 511711,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144951150.634, "dur": 1.323, + "args": { + "External id": 511712,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144951158.232, "dur": 48.167, + "args": { + "External id": 511713,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144951159.543, "dur": 46.096, + "args": { + "External id": 511714,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144951210.451, "dur": 17.548, + "args": { + "External id": 511715,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144951234.315, "dur": 4.118, + "args": { + "External id": 511716,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144951237.162, "dur": 0.364, + "args": { + "External id": 511717,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144951242.646, "dur": 50.947, + "args": { + "External id": 511718,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144951243.500, "dur": 8.715, + "args": { + "External id": 511719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144951246.401, "dur": 5.197, + "args": { + "External id": 511720,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144951248.100, "dur": 3.339, + "args": { + "External id": 511721,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144951253.353, "dur": 39.847, + "args": { + "External id": 511722,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144951253.967, "dur": 38.642, + "args": { + "External id": 511723,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144951297.695, "dur": 4.060, + "args": { + "External id": 511724,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144951300.158, "dur": 0.484, + "args": { + "External id": 511725,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144951307.902, "dur": 1.538, + "args": { + "External id": 511726,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144951320.591, "dur": 7.208, + "args": { + "External id": 511727,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144951323.078, "dur": 4.460, + "args": { + "External id": 511728,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144951419.991, "dur": 210.705, + "args": { + "External id": 511729,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144951424.552, "dur": 2.112, + "args": { + "External id": 511730,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144951428.358, "dur": 201.778, + "args": { + "External id": 511731,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144951429.883, "dur": 0.350, + "args": { + "External id": 511732,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144951433.663, "dur": 24.246, + "args": { + "External id": 511733,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144951459.669, "dur": 4.659, + "args": { + "External id": 511734,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144951463.228, "dur": 0.920, + "args": { + "External id": 511735,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144951465.449, "dur": 20.479, + "args": { + "External id": 511736,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144951466.892, "dur": 1.210, + "args": { + "External id": 511737,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144951469.839, "dur": 15.808, + "args": { + "External id": 511738,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144951472.894, "dur": 2.070, + "args": { + "External id": 511739,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144951487.188, "dur": 22.382, + "args": { + "External id": 511740,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144951511.133, "dur": 13.701, + "args": { + "External id": 511741,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144951527.595, "dur": 12.680, + "args": { + "External id": 511742,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144951541.702, "dur": 14.507, + "args": { + "External id": 511743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144951560.115, "dur": 22.400, + "args": { + "External id": 511744,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144951563.995, "dur": 1.683, + "args": { + "External id": 511745,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144951567.514, "dur": 0.681, + "args": { + "External id": 511746,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144951584.067, "dur": 31.638, + "args": { + "External id": 511747,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144951618.041, "dur": 10.577, + "args": { + "External id": 511748,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144951638.489, "dur": 2.597, + "args": { + "External id": 511749,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144951651.148, "dur": 4.146, + "args": { + "External id": 511750,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144951653.989, "dur": 0.585, + "args": { + "External id": 511751,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144951725.171, "dur": 56.240, + "args": { + "External id": 511752,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144951786.277, "dur": 4.202, + "args": { + "External id": 511753,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144951788.900, "dur": 0.560, + "args": { + "External id": 511754,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144951792.064, "dur": 23.112, + "args": { + "External id": 511755,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144951819.747, "dur": 8.584, + "args": { + "External id": 511756,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144951821.497, "dur": 6.063, + "args": { + "External id": 511757,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144951826.077, "dur": 1.222, + "args": { + "External id": 511758,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144951831.130, "dur": 41.560, + "args": { + "External id": 511759,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144951832.430, "dur": 39.554, + "args": { + "External id": 511760,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144951876.347, "dur": 13.402, + "args": { + "External id": 511761,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144951895.951, "dur": 3.244, + "args": { + "External id": 511762,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144951897.823, "dur": 0.531, + "args": { + "External id": 511763,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144951903.037, "dur": 46.148, + "args": { + "External id": 511764,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144951906.210, "dur": 4.073, + "args": { + "External id": 511765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144951907.198, "dur": 2.487, + "args": { + "External id": 511766,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144951908.736, "dur": 0.804, + "args": { + "External id": 511767,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144951910.840, "dur": 37.923, + "args": { + "External id": 511768,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144951911.525, "dur": 36.674, + "args": { + "External id": 511769,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144951953.340, "dur": 6.612, + "args": { + "External id": 511770,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144951958.173, "dur": 0.699, + "args": { + "External id": 511771,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144951967.567, "dur": 1.487, + "args": { + "External id": 511772,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144951976.983, "dur": 9.623, + "args": { + "External id": 511773,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144951979.259, "dur": 7.024, + "args": { + "External id": 511774,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144952104.145, "dur": 183.459, + "args": { + "External id": 511775,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144952106.753, "dur": 3.039, + "args": { + "External id": 511776,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144952111.463, "dur": 175.690, + "args": { + "External id": 511777,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144952115.400, "dur": 0.425, + "args": { + "External id": 511778,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144952117.156, "dur": 24.834, + "args": { + "External id": 511779,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144952143.720, "dur": 3.745, + "args": { + "External id": 511780,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144952146.117, "dur": 1.066, + "args": { + "External id": 511781,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144952148.439, "dur": 22.935, + "args": { + "External id": 511782,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144952149.422, "dur": 1.281, + "args": { + "External id": 511783,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144952152.146, "dur": 18.969, + "args": { + "External id": 511784,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144952157.145, "dur": 2.685, + "args": { + "External id": 511785,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144952172.920, "dur": 20.292, + "args": { + "External id": 511786,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144952194.678, "dur": 12.566, + "args": { + "External id": 511787,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144952212.327, "dur": 13.612, + "args": { + "External id": 511788,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144952227.301, "dur": 11.854, + "args": { + "External id": 511789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144952241.141, "dur": 18.731, + "args": { + "External id": 511790,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144952242.932, "dur": 1.263, + "args": { + "External id": 511791,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144952246.445, "dur": 0.658, + "args": { + "External id": 511792,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144952261.314, "dur": 10.552, + "args": { + "External id": 511793,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144952275.425, "dur": 10.625, + "args": { + "External id": 511794,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144952294.472, "dur": 2.327, + "args": { + "External id": 511795,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144952306.027, "dur": 3.772, + "args": { + "External id": 511796,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144952308.581, "dur": 0.395, + "args": { + "External id": 511797,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144952373.882, "dur": 56.810, + "args": { + "External id": 511798,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144952435.721, "dur": 4.127, + "args": { + "External id": 511799,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144952438.061, "dur": 0.901, + "args": { + "External id": 511800,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144952441.252, "dur": 24.419, + "args": { + "External id": 511801,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144952472.284, "dur": 5.528, + "args": { + "External id": 511802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144952474.010, "dur": 2.964, + "args": { + "External id": 511803,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144952475.888, "dur": 0.917, + "args": { + "External id": 511804,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144952480.146, "dur": 40.752, + "args": { + "External id": 511805,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144952481.342, "dur": 38.754, + "args": { + "External id": 511806,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144952524.569, "dur": 15.836, + "args": { + "External id": 511807,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144952545.770, "dur": 6.146, + "args": { + "External id": 511808,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144952550.428, "dur": 0.609, + "args": { + "External id": 511809,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144952555.978, "dur": 66.259, + "args": { + "External id": 511810,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144952557.021, "dur": 5.273, + "args": { + "External id": 511811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144952558.037, "dur": 3.632, + "args": { + "External id": 511812,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144952559.503, "dur": 1.989, + "args": { + "External id": 511813,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144952562.883, "dur": 58.918, + "args": { + "External id": 511814,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144952565.899, "dur": 54.920, + "args": { + "External id": 511815,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144952628.673, "dur": 4.761, + "args": { + "External id": 511816,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144952631.355, "dur": 0.749, + "args": { + "External id": 511817,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144952640.336, "dur": 1.608, + "args": { + "External id": 511818,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144952650.075, "dur": 9.527, + "args": { + "External id": 511819,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144952652.828, "dur": 6.489, + "args": { + "External id": 511820,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144952743.008, "dur": 174.552, + "args": { + "External id": 511821,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144952745.799, "dur": 1.721, + "args": { + "External id": 511822,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144952748.643, "dur": 168.466, + "args": { + "External id": 511823,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144952750.077, "dur": 0.403, + "args": { + "External id": 511824,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144952753.379, "dur": 20.913, + "args": { + "External id": 511825,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144952775.970, "dur": 3.095, + "args": { + "External id": 511826,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144952778.066, "dur": 0.788, + "args": { + "External id": 511827,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144952779.994, "dur": 24.750, + "args": { + "External id": 511828,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144952781.212, "dur": 2.959, + "args": { + "External id": 511829,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144952788.038, "dur": 16.427, + "args": { + "External id": 511830,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144952791.319, "dur": 2.217, + "args": { + "External id": 511831,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144952806.504, "dur": 19.735, + "args": { + "External id": 511832,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144952827.505, "dur": 12.849, + "args": { + "External id": 511833,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144952842.739, "dur": 12.407, + "args": { + "External id": 511834,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144952856.637, "dur": 11.614, + "args": { + "External id": 511835,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144952869.766, "dur": 18.553, + "args": { + "External id": 511836,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144952871.553, "dur": 1.674, + "args": { + "External id": 511837,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144952875.376, "dur": 0.757, + "args": { + "External id": 511838,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144952891.759, "dur": 12.008, + "args": { + "External id": 511839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144952904.874, "dur": 11.257, + "args": { + "External id": 511840,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144952923.840, "dur": 1.864, + "args": { + "External id": 511841,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144952933.777, "dur": 3.624, + "args": { + "External id": 511842,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144952935.928, "dur": 0.679, + "args": { + "External id": 511843,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144952998.942, "dur": 50.883, + "args": { + "External id": 511844,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144953054.409, "dur": 38.924, + "args": { + "External id": 511845,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144953090.344, "dur": 1.098, + "args": { + "External id": 511846,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144953096.868, "dur": 33.160, + "args": { + "External id": 511847,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144953135.895, "dur": 5.821, + "args": { + "External id": 511848,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144953137.525, "dur": 3.290, + "args": { + "External id": 511849,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144953139.638, "dur": 1.016, + "args": { + "External id": 511850,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144953144.915, "dur": 47.434, + "args": { + "External id": 511851,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144953146.075, "dur": 45.465, + "args": { + "External id": 511852,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144953196.453, "dur": 15.011, + "args": { + "External id": 511853,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144953218.541, "dur": 26.654, + "args": { + "External id": 511854,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144953221.173, "dur": 23.611, + "args": { + "External id": 511855,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144953227.909, "dur": 0.654, + "args": { + "External id": 511856,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144953251.042, "dur": 28.501, + "args": { + "External id": 511857,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144953253.177, "dur": 26.100, + "args": { + "External id": 511858,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 19894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144953257.659, "dur": 4.303, + "args": { + "External id": 511859,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144953263.430, "dur": 15.285, + "args": { + "External id": 511860,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2338711, + "ts": 6274144953294.000, "dur": 7.309, + "args": { + "External id": 511861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2338711, + "ts": 6274144953298.224, "dur": 2.761, + "args": { + "External id": 511862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2338711, + "ts": 6274144953302.497, "dur": 1.658, + "args": { + "External id": 511863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2338711, + "ts": 6274144953303.113, "dur": 0.776, + "args": { + "External id": 511864,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144953345.171, "dur": 22.741, + "args": { + "External id": 511865,"Sequence number": 5516447, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 19901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144953369.693, "dur": 13.452, + "args": { + "External id": 511866,"Sequence number": 5516448, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 19902 + } + }, + { + "ph": "s", "id": 240, "pid": 2338711, "tid": 2338711, "ts": 6274144953369.693, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144953389.153, "dur": 5.980, + "args": { + "External id": 511867,"Sequence number": 5516449, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 19903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144953392.706, "dur": 1.011, + "args": { + "External id": 511868,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2338711, + "ts": 6274144953397.618, "dur": 8.029, + "args": { + "External id": 511869,"Sequence number": 5516449, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 19905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144953403.926, "dur": 0.554, + "args": { + "External id": 511870,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144953406.790, "dur": 2.807, + "args": { + "External id": 511871,"Sequence number": 5516449, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 19907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144953408.742, "dur": 0.242, + "args": { + "External id": 511872,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 19908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144953413.675, "dur": 5.583, + "args": { + "External id": 511873,"Sequence number": 5516449, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19909 + } + }, + { + "ph": "s", "id": 239, "pid": 2338711, "tid": 2338711, "ts": 6274144953413.675, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144953417.426, "dur": 0.665, + "args": { + "External id": 511874,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144953422.498, "dur": 4.849, + "args": { + "External id": 511875,"Sequence number": 5516450, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19911 + } + }, + { + "ph": "s", "id": 238, "pid": 2338711, "tid": 2338711, "ts": 6274144953422.498, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144953425.820, "dur": 0.733, + "args": { + "External id": 511876,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2338711, + "ts": 6274144953428.267, "dur": 5.291, + "args": { + "External id": 511877,"Sequence number": 5516451, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 19913 + } + }, + { + "ph": "s", "id": 237, "pid": 2338711, "tid": 2338711, "ts": 6274144953428.267, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144953432.099, "dur": 0.608, + "args": { + "External id": 511878,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144953434.484, "dur": 6.669, + "args": { + "External id": 511879,"Sequence number": 5516452, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 19915 + } + }, + { + "ph": "s", "id": 236, "pid": 2338711, "tid": 2338711, "ts": 6274144953434.484, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144953437.621, "dur": 2.798, + "args": { + "External id": 511880,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 19916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2338711, + "ts": 6274144953444.823, "dur": 30.790, + "args": { + "External id": 511881,"Sequence number": 5516453, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2338711, + "ts": 6274144953448.589, "dur": 26.824, + "args": { + "External id": 511882,"Sequence number": 5516453, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144953451.160, "dur": 6.159, + "args": { + "External id": 511883,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 19919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144953453.528, "dur": 3.254, + "args": { + "External id": 511884,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144953458.113, "dur": 16.807, + "args": { + "External id": 511885,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 19921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144953500.818, "dur": 4.010, + "args": { + "External id": 511886,"Sequence number": 5516453, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 19922 + } + }, + { + "ph": "s", "id": 235, "pid": 2338711, "tid": 2338711, "ts": 6274144953500.818, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144953507.354, "dur": 1.050, + "args": { + "External id": 511887,"Sequence number": 5516454, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274144953543.113, "dur": 48384.129, + "args": { + "External id": 511888,"Sequence number": 5516454, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 19924 + } + }, + { + "ph": "s", "id": 234, "pid": 2338711, "tid": 2338711, "ts": 6274144953543.113, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2338711, + "ts": 6274144953557.476, "dur": 49.242, + "args": { + "External id": 511889,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2338711, + "ts": 6274144953562.720, "dur": 43.756, + "args": { + "External id": 511890,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144953566.112, "dur": 5.164, + "args": { + "External id": 511891,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144953567.887, "dur": 2.951, + "args": { + "External id": 511892,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144953571.982, "dur": 33.814, + "args": { + "External id": 511893,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 19929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144953626.758, "dur": 30.334, + "args": { + "External id": 511894,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144953628.042, "dur": 7.039, + "args": { + "External id": 511895,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144953630.738, "dur": 3.978, + "args": { + "External id": 511896,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274144953636.477, "dur": 20.401, + "args": { + "External id": 511897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144953640.628, "dur": 15.781, + "args": { + "External id": 511898,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144953660.983, "dur": 21.804, + "args": { + "External id": 511899,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274144953661.852, "dur": 4.626, + "args": { + "External id": 511900,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144953663.348, "dur": 2.911, + "args": { + "External id": 511901,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274144953667.141, "dur": 15.452, + "args": { + "External id": 511902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144953667.897, "dur": 14.340, + "args": { + "External id": 511903,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 19939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2338711, + "ts": 6274144953689.370, "dur": 21.609, + "args": { + "External id": 511904,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144953691.336, "dur": 3.200, + "args": { + "External id": 511905,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274144953695.109, "dur": 15.584, + "args": { + "External id": 511906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 19942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144953697.999, "dur": 12.317, + "args": { + "External id": 511907,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338711, "tid": 2338711, + "ts": 6274144953715.835, "dur": 24.147, + "args": { + "External id": 511908,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144953742.875, "dur": 48.178, + "args": { + "External id": 511909,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144953744.997, "dur": 45.666, + "args": { + "External id": 511910,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144953749.346, "dur": 0.897, + "args": { + "External id": 511911,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274144953751.918, "dur": 22.745, + "args": { + "External id": 511912,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274144953753.916, "dur": 20.474, + "args": { + "External id": 511913,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 19949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144953756.426, "dur": 2.640, + "args": { + "External id": 511914,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144953759.940, "dur": 14.013, + "args": { + "External id": 511915,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 19951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338711, "tid": 2338711, + "ts": 6274144953797.467, "dur": 42283.819, + "args": { + "External id": 511916,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338711, "tid": 2338711, + "ts": 6274144953798.985, "dur": 42281.042, + "args": { + "External id": 511917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144996094.992, "dur": 8.488, + "args": { + "External id": 511918,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144996100.499, "dur": 1.173, + "args": { + "External id": 511919,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144996109.004, "dur": 110.053, + "args": { + "External id": 511920,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144996110.535, "dur": 6.167, + "args": { + "External id": 511921,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144996112.552, "dur": 3.248, + "args": { + "External id": 511922,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144996114.889, "dur": 0.639, + "args": { + "External id": 511923,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144996120.402, "dur": 97.919, + "args": { + "External id": 511924,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144996122.327, "dur": 95.300, + "args": { + "External id": 511925,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144996222.392, "dur": 4.351, + "args": { + "External id": 511926,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144996224.898, "dur": 0.584, + "args": { + "External id": 511927,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144996234.363, "dur": 2.715, + "args": { + "External id": 511928,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144996247.568, "dur": 9.340, + "args": { + "External id": 511929,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144996252.408, "dur": 4.212, + "args": { + "External id": 511930,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144996394.729, "dur": 226.256, + "args": { + "External id": 511931,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144996398.169, "dur": 2.067, + "args": { + "External id": 511932,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144996402.033, "dur": 218.394, + "args": { + "External id": 511933,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144996403.943, "dur": 0.494, + "args": { + "External id": 511934,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144996405.935, "dur": 29.471, + "args": { + "External id": 511935,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144996437.236, "dur": 3.485, + "args": { + "External id": 511936,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144996439.708, "dur": 0.699, + "args": { + "External id": 511937,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144996442.049, "dur": 31.587, + "args": { + "External id": 511938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144996449.978, "dur": 2.604, + "args": { + "External id": 511939,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144996454.057, "dur": 19.267, + "args": { + "External id": 511940,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144996458.042, "dur": 3.209, + "args": { + "External id": 511941,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144996475.214, "dur": 22.335, + "args": { + "External id": 511942,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144996499.869, "dur": 14.896, + "args": { + "External id": 511943,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144996517.828, "dur": 14.679, + "args": { + "External id": 511944,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144996534.144, "dur": 14.612, + "args": { + "External id": 511945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144996550.838, "dur": 23.080, + "args": { + "External id": 511946,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144996553.236, "dur": 1.433, + "args": { + "External id": 511947,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144996559.112, "dur": 0.553, + "args": { + "External id": 511948,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144996575.496, "dur": 27.858, + "args": { + "External id": 511949,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144996605.860, "dur": 13.048, + "args": { + "External id": 511950,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144996630.534, "dur": 2.633, + "args": { + "External id": 511951,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144996640.978, "dur": 4.004, + "args": { + "External id": 511952,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144996643.409, "dur": 0.617, + "args": { + "External id": 511953,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144996728.046, "dur": 72.556, + "args": { + "External id": 511954,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144996808.230, "dur": 6.421, + "args": { + "External id": 511955,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144996810.870, "dur": 0.775, + "args": { + "External id": 511956,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144996816.104, "dur": 28.043, + "args": { + "External id": 511957,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144996849.955, "dur": 7.875, + "args": { + "External id": 511958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144996851.764, "dur": 5.079, + "args": { + "External id": 511959,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144996854.007, "dur": 2.511, + "args": { + "External id": 511960,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144996863.204, "dur": 46.139, + "args": { + "External id": 511961,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144996864.391, "dur": 44.423, + "args": { + "External id": 511962,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144996914.151, "dur": 15.066, + "args": { + "External id": 511963,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144996935.406, "dur": 3.896, + "args": { + "External id": 511964,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144996937.827, "dur": 0.545, + "args": { + "External id": 511965,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144996944.119, "dur": 47.367, + "args": { + "External id": 511966,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144996945.202, "dur": 5.873, + "args": { + "External id": 511967,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144996948.286, "dur": 2.163, + "args": { + "External id": 511968,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144996949.694, "dur": 0.560, + "args": { + "External id": 511969,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144996951.812, "dur": 39.286, + "args": { + "External id": 511970,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144996952.544, "dur": 38.085, + "args": { + "External id": 511971,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144996995.162, "dur": 3.614, + "args": { + "External id": 511972,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144996997.028, "dur": 0.575, + "args": { + "External id": 511973,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144997007.022, "dur": 1.511, + "args": { + "External id": 511974,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144997017.912, "dur": 7.336, + "args": { + "External id": 511975,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144997020.096, "dur": 4.832, + "args": { + "External id": 511976,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144997161.687, "dur": 196.781, + "args": { + "External id": 511977,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144997164.575, "dur": 3.106, + "args": { + "External id": 511978,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144997171.739, "dur": 186.209, + "args": { + "External id": 511979,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144997176.499, "dur": 0.543, + "args": { + "External id": 511980,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144997178.306, "dur": 21.866, + "args": { + "External id": 511981,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144997202.037, "dur": 6.343, + "args": { + "External id": 511982,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144997204.407, "dur": 3.668, + "args": { + "External id": 511983,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144997209.883, "dur": 21.223, + "args": { + "External id": 511984,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144997211.366, "dur": 1.188, + "args": { + "External id": 511985,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144997214.024, "dur": 16.650, + "args": { + "External id": 511986,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144997217.039, "dur": 2.301, + "args": { + "External id": 511987,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144997232.824, "dur": 20.797, + "args": { + "External id": 511988,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144997255.560, "dur": 14.082, + "args": { + "External id": 511989,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144997274.682, "dur": 12.801, + "args": { + "External id": 511990,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144997289.197, "dur": 13.535, + "args": { + "External id": 511991,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144997304.846, "dur": 23.023, + "args": { + "External id": 511992,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144997307.197, "dur": 1.418, + "args": { + "External id": 511993,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144997310.830, "dur": 2.079, + "args": { + "External id": 511994,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144997329.331, "dur": 13.475, + "args": { + "External id": 511995,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144997344.215, "dur": 12.609, + "args": { + "External id": 511996,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144997368.857, "dur": 2.151, + "args": { + "External id": 511997,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144997382.044, "dur": 3.893, + "args": { + "External id": 511998,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144997384.617, "dur": 0.437, + "args": { + "External id": 511999,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144997458.117, "dur": 58.301, + "args": { + "External id": 512000,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144997545.046, "dur": 5.720, + "args": { + "External id": 512001,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144997548.579, "dur": 0.689, + "args": { + "External id": 512002,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144997552.349, "dur": 26.189, + "args": { + "External id": 512003,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144997585.690, "dur": 26.681, + "args": { + "External id": 512004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144997603.607, "dur": 7.512, + "args": { + "External id": 512005,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144997607.199, "dur": 3.501, + "args": { + "External id": 512006,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144997616.219, "dur": 49.780, + "args": { + "External id": 512007,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144997617.445, "dur": 47.928, + "args": { + "External id": 512008,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144997671.144, "dur": 18.085, + "args": { + "External id": 512009,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144997695.502, "dur": 6.350, + "args": { + "External id": 512010,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144997700.344, "dur": 0.539, + "args": { + "External id": 512011,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144997706.295, "dur": 47.648, + "args": { + "External id": 512012,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144997707.276, "dur": 3.163, + "args": { + "External id": 512013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144997707.952, "dur": 1.876, + "args": { + "External id": 512014,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144997709.060, "dur": 0.612, + "args": { + "External id": 512015,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144997711.071, "dur": 42.468, + "args": { + "External id": 512016,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144997714.286, "dur": 38.619, + "args": { + "External id": 512017,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144997758.226, "dur": 5.933, + "args": { + "External id": 512018,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144997760.535, "dur": 2.520, + "args": { + "External id": 512019,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144997770.859, "dur": 1.527, + "args": { + "External id": 512020,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144997781.135, "dur": 6.358, + "args": { + "External id": 512021,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144997783.014, "dur": 4.199, + "args": { + "External id": 512022,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144997889.573, "dur": 240.054, + "args": { + "External id": 512023,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144997891.870, "dur": 1.817, + "args": { + "External id": 512024,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144997897.918, "dur": 231.174, + "args": { + "External id": 512025,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144997899.310, "dur": 0.368, + "args": { + "External id": 512026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144997900.700, "dur": 22.763, + "args": { + "External id": 512027,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144997925.083, "dur": 4.607, + "args": { + "External id": 512028,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144997926.907, "dur": 2.485, + "args": { + "External id": 512029,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144997930.681, "dur": 25.937, + "args": { + "External id": 512030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144997932.100, "dur": 1.190, + "args": { + "External id": 512031,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144997934.584, "dur": 21.767, + "args": { + "External id": 512032,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144997939.851, "dur": 2.862, + "args": { + "External id": 512033,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144997958.004, "dur": 22.279, + "args": { + "External id": 512034,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144997981.949, "dur": 15.448, + "args": { + "External id": 512035,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144998000.434, "dur": 14.234, + "args": { + "External id": 512036,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144998016.151, "dur": 14.633, + "args": { + "External id": 512037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144998032.463, "dur": 59.516, + "args": { + "External id": 512038,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144998034.724, "dur": 1.475, + "args": { + "External id": 512039,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144998038.122, "dur": 2.757, + "args": { + "External id": 512040,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144998097.730, "dur": 17.406, + "args": { + "External id": 512041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144998116.202, "dur": 11.823, + "args": { + "External id": 512042,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144998138.069, "dur": 2.531, + "args": { + "External id": 512043,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144998151.248, "dur": 4.087, + "args": { + "External id": 512044,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144998153.809, "dur": 0.559, + "args": { + "External id": 512045,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144998230.088, "dur": 87.329, + "args": { + "External id": 512046,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144998322.507, "dur": 7.948, + "args": { + "External id": 512047,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144998328.542, "dur": 0.674, + "args": { + "External id": 512048,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144998332.097, "dur": 31.567, + "args": { + "External id": 512049,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144998368.596, "dur": 5.620, + "args": { + "External id": 512050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144998370.189, "dur": 3.293, + "args": { + "External id": 512051,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144998371.986, "dur": 1.294, + "args": { + "External id": 512052,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144998376.951, "dur": 48.656, + "args": { + "External id": 512053,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144998378.121, "dur": 46.939, + "args": { + "External id": 512054,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144998431.767, "dur": 14.364, + "args": { + "External id": 512055,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144998451.936, "dur": 3.868, + "args": { + "External id": 512056,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144998454.511, "dur": 0.505, + "args": { + "External id": 512057,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144998459.668, "dur": 47.172, + "args": { + "External id": 512058,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144998460.763, "dur": 6.031, + "args": { + "External id": 512059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144998461.547, "dur": 4.667, + "args": { + "External id": 512060,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144998465.489, "dur": 0.569, + "args": { + "External id": 512061,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144998467.269, "dur": 39.147, + "args": { + "External id": 512062,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144998468.011, "dur": 37.722, + "args": { + "External id": 512063,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144998510.428, "dur": 3.908, + "args": { + "External id": 512064,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144998512.629, "dur": 0.513, + "args": { + "External id": 512065,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144998519.488, "dur": 1.428, + "args": { + "External id": 512066,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144998532.105, "dur": 6.353, + "args": { + "External id": 512067,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144998533.942, "dur": 4.270, + "args": { + "External id": 512068,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144998639.564, "dur": 188.904, + "args": { + "External id": 512069,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144998643.972, "dur": 4.121, + "args": { + "External id": 512070,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144998650.154, "dur": 177.767, + "args": { + "External id": 512071,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144998651.536, "dur": 0.485, + "args": { + "External id": 512072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144998652.879, "dur": 21.937, + "args": { + "External id": 512073,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144998679.153, "dur": 4.864, + "args": { + "External id": 512074,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144998682.909, "dur": 0.848, + "args": { + "External id": 512075,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144998684.916, "dur": 22.384, + "args": { + "External id": 512076,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144998685.928, "dur": 1.178, + "args": { + "External id": 512077,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144998688.714, "dur": 18.309, + "args": { + "External id": 512078,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144998691.460, "dur": 2.267, + "args": { + "External id": 512079,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144998708.448, "dur": 22.634, + "args": { + "External id": 512080,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144998732.825, "dur": 13.464, + "args": { + "External id": 512081,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144998749.007, "dur": 14.122, + "args": { + "External id": 512082,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144998764.965, "dur": 11.609, + "args": { + "External id": 512083,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144998778.132, "dur": 24.299, + "args": { + "External id": 512084,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144998784.747, "dur": 1.737, + "args": { + "External id": 512085,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144998788.767, "dur": 0.446, + "args": { + "External id": 512086,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144998803.765, "dur": 11.025, + "args": { + "External id": 512087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144998815.754, "dur": 11.300, + "args": { + "External id": 512088,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144998835.365, "dur": 1.854, + "args": { + "External id": 512089,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144998846.869, "dur": 3.555, + "args": { + "External id": 512090,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144998849.251, "dur": 0.381, + "args": { + "External id": 512091,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144998917.118, "dur": 53.276, + "args": { + "External id": 512092,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144998975.206, "dur": 4.983, + "args": { + "External id": 512093,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144998978.211, "dur": 0.886, + "args": { + "External id": 512094,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144998981.841, "dur": 22.603, + "args": { + "External id": 512095,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144999008.637, "dur": 7.208, + "args": { + "External id": 512096,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144999010.087, "dur": 4.809, + "args": { + "External id": 512097,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144999013.836, "dur": 0.850, + "args": { + "External id": 512098,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144999018.389, "dur": 75.575, + "args": { + "External id": 512099,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144999019.764, "dur": 73.101, + "args": { + "External id": 512100,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144999100.893, "dur": 15.686, + "args": { + "External id": 512101,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144999122.601, "dur": 5.087, + "args": { + "External id": 512102,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144999125.741, "dur": 0.755, + "args": { + "External id": 512103,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144999131.997, "dur": 53.286, + "args": { + "External id": 512104,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144999135.377, "dur": 4.030, + "args": { + "External id": 512105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144999136.383, "dur": 2.358, + "args": { + "External id": 512106,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144999138.042, "dur": 0.550, + "args": { + "External id": 512107,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144999140.318, "dur": 44.596, + "args": { + "External id": 512108,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144999141.103, "dur": 43.305, + "args": { + "External id": 512109,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144999189.480, "dur": 3.914, + "args": { + "External id": 512110,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144999191.834, "dur": 0.536, + "args": { + "External id": 512111,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144999199.569, "dur": 1.463, + "args": { + "External id": 512112,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144999211.028, "dur": 5.987, + "args": { + "External id": 512113,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144999213.042, "dur": 3.715, + "args": { + "External id": 512114,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144999306.616, "dur": 210.962, + "args": { + "External id": 512115,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144999311.234, "dur": 2.255, + "args": { + "External id": 512116,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274144999315.371, "dur": 201.606, + "args": { + "External id": 512117,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274144999316.629, "dur": 0.521, + "args": { + "External id": 512118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274144999320.150, "dur": 23.276, + "args": { + "External id": 512119,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274144999344.952, "dur": 5.324, + "args": { + "External id": 512120,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144999349.370, "dur": 0.693, + "args": { + "External id": 512121,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144999351.117, "dur": 23.308, + "args": { + "External id": 512122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144999352.207, "dur": 1.194, + "args": { + "External id": 512123,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274144999354.563, "dur": 19.615, + "args": { + "External id": 512124,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144999357.331, "dur": 2.805, + "args": { + "External id": 512125,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274144999375.817, "dur": 23.620, + "args": { + "External id": 512126,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274144999401.179, "dur": 14.985, + "args": { + "External id": 512127,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274144999426.873, "dur": 22.332, + "args": { + "External id": 512128,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274144999452.699, "dur": 13.160, + "args": { + "External id": 512129,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274144999467.585, "dur": 21.316, + "args": { + "External id": 512130,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274144999469.798, "dur": 1.571, + "args": { + "External id": 512131,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144999473.233, "dur": 0.721, + "args": { + "External id": 512132,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274144999490.256, "dur": 13.241, + "args": { + "External id": 512133,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144999504.449, "dur": 11.429, + "args": { + "External id": 512134,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144999526.958, "dur": 1.742, + "args": { + "External id": 512135,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144999537.834, "dur": 3.900, + "args": { + "External id": 512136,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144999540.409, "dur": 0.415, + "args": { + "External id": 512137,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144999630.255, "dur": 64.671, + "args": { + "External id": 512138,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144999700.655, "dur": 6.431, + "args": { + "External id": 512139,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144999704.348, "dur": 0.845, + "args": { + "External id": 512140,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274144999708.753, "dur": 28.207, + "args": { + "External id": 512141,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144999742.029, "dur": 9.881, + "args": { + "External id": 512142,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144999746.175, "dur": 4.938, + "args": { + "External id": 512143,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144999748.097, "dur": 2.731, + "args": { + "External id": 512144,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144999754.834, "dur": 43.570, + "args": { + "External id": 512145,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144999756.088, "dur": 41.746, + "args": { + "External id": 512146,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274144999802.425, "dur": 16.134, + "args": { + "External id": 512147,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144999824.100, "dur": 5.836, + "args": { + "External id": 512148,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144999828.766, "dur": 0.418, + "args": { + "External id": 512149,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274144999837.196, "dur": 45.193, + "args": { + "External id": 512150,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274144999838.276, "dur": 3.741, + "args": { + "External id": 512151,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274144999839.206, "dur": 2.248, + "args": { + "External id": 512152,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144999840.645, "dur": 0.671, + "args": { + "External id": 512153,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274144999842.548, "dur": 39.515, + "args": { + "External id": 512154,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274144999843.024, "dur": 38.538, + "args": { + "External id": 512155,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274144999889.143, "dur": 3.502, + "args": { + "External id": 512156,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274144999890.990, "dur": 0.532, + "args": { + "External id": 512157,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274144999898.735, "dur": 1.601, + "args": { + "External id": 512158,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144999908.673, "dur": 10.149, + "args": { + "External id": 512159,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274144999911.136, "dur": 7.388, + "args": { + "External id": 512160,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145000008.838, "dur": 231.054, + "args": { + "External id": 512161,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145000011.242, "dur": 1.982, + "args": { + "External id": 512162,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145000014.412, "dur": 224.732, + "args": { + "External id": 512163,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274145000015.704, "dur": 0.356, + "args": { + "External id": 512164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274145000017.417, "dur": 23.657, + "args": { + "External id": 512165,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274145000042.886, "dur": 3.280, + "args": { + "External id": 512166,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145000045.011, "dur": 0.936, + "args": { + "External id": 512167,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145000047.209, "dur": 64.219, + "args": { + "External id": 512168,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145000048.158, "dur": 1.434, + "args": { + "External id": 512169,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145000087.098, "dur": 24.008, + "args": { + "External id": 512170,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145000093.140, "dur": 2.820, + "args": { + "External id": 512171,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274145000112.802, "dur": 23.849, + "args": { + "External id": 512172,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274145000138.378, "dur": 13.691, + "args": { + "External id": 512173,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274145000154.692, "dur": 14.866, + "args": { + "External id": 512174,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274145000170.998, "dur": 13.328, + "args": { + "External id": 512175,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274145000186.150, "dur": 21.945, + "args": { + "External id": 512176,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145000188.160, "dur": 1.549, + "args": { + "External id": 512177,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145000192.354, "dur": 0.844, + "args": { + "External id": 512178,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274145000211.626, "dur": 12.519, + "args": { + "External id": 512179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145000225.329, "dur": 12.413, + "args": { + "External id": 512180,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145000248.620, "dur": 2.258, + "args": { + "External id": 512181,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145000260.739, "dur": 4.066, + "args": { + "External id": 512182,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145000263.273, "dur": 0.614, + "args": { + "External id": 512183,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145000334.311, "dur": 57.660, + "args": { + "External id": 512184,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145000397.018, "dur": 4.708, + "args": { + "External id": 512185,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145000399.947, "dur": 0.733, + "args": { + "External id": 512186,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274145000405.168, "dur": 26.142, + "args": { + "External id": 512187,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145000436.021, "dur": 5.585, + "args": { + "External id": 512188,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145000437.562, "dur": 3.241, + "args": { + "External id": 512189,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145000439.809, "dur": 0.736, + "args": { + "External id": 512190,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145000444.084, "dur": 40.443, + "args": { + "External id": 512191,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145000445.389, "dur": 38.571, + "args": { + "External id": 512192,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145000488.687, "dur": 15.464, + "args": { + "External id": 512193,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145000511.724, "dur": 3.547, + "args": { + "External id": 512194,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145000513.704, "dur": 0.725, + "args": { + "External id": 512195,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274145000519.365, "dur": 50.813, + "args": { + "External id": 512196,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145000520.533, "dur": 5.280, + "args": { + "External id": 512197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145000521.038, "dur": 4.185, + "args": { + "External id": 512198,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145000522.535, "dur": 2.529, + "args": { + "External id": 512199,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145000529.071, "dur": 40.669, + "args": { + "External id": 512200,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145000529.588, "dur": 39.480, + "args": { + "External id": 512201,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145000574.383, "dur": 3.350, + "args": { + "External id": 512202,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145000576.147, "dur": 0.501, + "args": { + "External id": 512203,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145000583.412, "dur": 1.572, + "args": { + "External id": 512204,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145000610.279, "dur": 7.319, + "args": { + "External id": 512205,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145000612.792, "dur": 4.326, + "args": { + "External id": 512206,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145000706.062, "dur": 185.637, + "args": { + "External id": 512207,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145000708.439, "dur": 1.657, + "args": { + "External id": 512208,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145000711.267, "dur": 179.938, + "args": { + "External id": 512209,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274145000712.673, "dur": 0.491, + "args": { + "External id": 512210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274145000716.832, "dur": 22.584, + "args": { + "External id": 512211,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274145000740.811, "dur": 4.167, + "args": { + "External id": 512212,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145000743.626, "dur": 0.848, + "args": { + "External id": 512213,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145000745.773, "dur": 26.994, + "args": { + "External id": 512214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145000746.783, "dur": 3.169, + "args": { + "External id": 512215,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145000753.347, "dur": 19.029, + "args": { + "External id": 512216,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145000756.491, "dur": 2.925, + "args": { + "External id": 512217,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274145000773.954, "dur": 21.073, + "args": { + "External id": 512218,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274145000796.890, "dur": 14.963, + "args": { + "External id": 512219,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274145000814.700, "dur": 13.502, + "args": { + "External id": 512220,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274145000829.462, "dur": 12.949, + "args": { + "External id": 512221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274145000843.861, "dur": 20.695, + "args": { + "External id": 512222,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145000845.724, "dur": 1.553, + "args": { + "External id": 512223,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145000851.502, "dur": 0.496, + "args": { + "External id": 512224,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274145000865.917, "dur": 11.829, + "args": { + "External id": 512225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145000879.025, "dur": 11.153, + "args": { + "External id": 512226,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145000898.245, "dur": 2.014, + "args": { + "External id": 512227,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145000909.307, "dur": 3.816, + "args": { + "External id": 512228,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145000912.037, "dur": 0.310, + "args": { + "External id": 512229,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145000975.880, "dur": 54.538, + "args": { + "External id": 512230,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145001035.634, "dur": 7.057, + "args": { + "External id": 512231,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145001040.866, "dur": 0.803, + "args": { + "External id": 512232,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274145001043.884, "dur": 58.351, + "args": { + "External id": 512233,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145001109.460, "dur": 8.092, + "args": { + "External id": 512234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145001111.126, "dur": 5.434, + "args": { + "External id": 512235,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145001113.687, "dur": 2.625, + "args": { + "External id": 512236,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145001120.681, "dur": 47.212, + "args": { + "External id": 512237,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145001124.340, "dur": 42.847, + "args": { + "External id": 512238,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145001171.785, "dur": 14.799, + "args": { + "External id": 512239,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145001192.617, "dur": 3.809, + "args": { + "External id": 512240,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145001195.048, "dur": 0.426, + "args": { + "External id": 512241,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274145001200.518, "dur": 46.252, + "args": { + "External id": 512242,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145001201.509, "dur": 5.786, + "args": { + "External id": 512243,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145001202.232, "dur": 4.352, + "args": { + "External id": 512244,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145001205.980, "dur": 0.462, + "args": { + "External id": 512245,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145001207.785, "dur": 38.548, + "args": { + "External id": 512246,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145001208.831, "dur": 36.870, + "args": { + "External id": 512247,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145001250.775, "dur": 3.855, + "args": { + "External id": 512248,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145001252.796, "dur": 0.527, + "args": { + "External id": 512249,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145001263.571, "dur": 1.433, + "args": { + "External id": 512250,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145001274.992, "dur": 9.169, + "args": { + "External id": 512251,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145001277.313, "dur": 6.524, + "args": { + "External id": 512252,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145001369.357, "dur": 169.799, + "args": { + "External id": 512253,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145001371.361, "dur": 1.862, + "args": { + "External id": 512254,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145001374.506, "dur": 164.276, + "args": { + "External id": 512255,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274145001375.808, "dur": 0.462, + "args": { + "External id": 512256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274145001380.016, "dur": 22.108, + "args": { + "External id": 512257,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274145001403.599, "dur": 3.814, + "args": { + "External id": 512258,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145001406.339, "dur": 0.856, + "args": { + "External id": 512259,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145001408.148, "dur": 21.367, + "args": { + "External id": 512260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145001409.079, "dur": 1.120, + "args": { + "External id": 512261,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145001411.706, "dur": 17.550, + "args": { + "External id": 512262,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145001416.042, "dur": 2.094, + "args": { + "External id": 512263,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274145001430.801, "dur": 18.541, + "args": { + "External id": 512264,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274145001450.993, "dur": 11.929, + "args": { + "External id": 512265,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274145001465.301, "dur": 12.302, + "args": { + "External id": 512266,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274145001479.152, "dur": 10.908, + "args": { + "External id": 512267,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274145001494.294, "dur": 18.787, + "args": { + "External id": 512268,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145001496.370, "dur": 1.410, + "args": { + "External id": 512269,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145001500.212, "dur": 0.587, + "args": { + "External id": 512270,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274145001514.352, "dur": 10.730, + "args": { + "External id": 512271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145001526.106, "dur": 11.563, + "args": { + "External id": 512272,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145001545.496, "dur": 1.426, + "args": { + "External id": 512273,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145001554.760, "dur": 4.189, + "args": { + "External id": 512274,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145001557.272, "dur": 0.811, + "args": { + "External id": 512275,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145001646.526, "dur": 58.232, + "args": { + "External id": 512276,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145001710.544, "dur": 6.260, + "args": { + "External id": 512277,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145001713.891, "dur": 1.314, + "args": { + "External id": 512278,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274145001718.290, "dur": 27.176, + "args": { + "External id": 512279,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145001750.243, "dur": 7.076, + "args": { + "External id": 512280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145001751.545, "dur": 5.105, + "args": { + "External id": 512281,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145001755.616, "dur": 0.870, + "args": { + "External id": 512282,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145001760.122, "dur": 40.922, + "args": { + "External id": 512283,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145001761.372, "dur": 39.159, + "args": { + "External id": 512284,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145001805.085, "dur": 13.942, + "args": { + "External id": 512285,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274145001823.339, "dur": 24.788, + "args": { + "External id": 512286,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274145001825.744, "dur": 21.956, + "args": { + "External id": 512287,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145001832.481, "dur": 0.665, + "args": { + "External id": 512288,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 20324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274145001853.253, "dur": 27.529, + "args": { + "External id": 512289,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 20325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274145001855.087, "dur": 25.469, + "args": { + "External id": 512290,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 20326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145001862.258, "dur": 3.313, + "args": { + "External id": 512291,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274145001866.636, "dur": 13.356, + "args": { + "External id": 512292,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2338711, + "ts": 6274145001893.313, "dur": 4.923, + "args": { + "External id": 512293,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2338711, + "ts": 6274145001894.991, "dur": 2.948, + "args": { + "External id": 512294,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2338711, + "ts": 6274145001899.722, "dur": 1.285, + "args": { + "External id": 512295,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2338711, + "ts": 6274145001900.219, "dur": 0.510, + "args": { + "External id": 512296,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145001944.672, "dur": 21.696, + "args": { + "External id": 512297,"Sequence number": 5516455, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145001968.219, "dur": 12.549, + "args": { + "External id": 512298,"Sequence number": 5516456, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20334 + } + }, + { + "ph": "s", "id": 233, "pid": 2338711, "tid": 2338711, "ts": 6274145001968.219, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145001986.462, "dur": 6.120, + "args": { + "External id": 512299,"Sequence number": 5516457, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 20335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145001990.518, "dur": 0.855, + "args": { + "External id": 512300,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 20336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2338711, + "ts": 6274145001995.431, "dur": 5.977, + "args": { + "External id": 512301,"Sequence number": 5516457, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "3"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 20337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145001999.623, "dur": 0.532, + "args": { + "External id": 512302,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 20338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145002005.037, "dur": 2.807, + "args": { + "External id": 512303,"Sequence number": 5516457, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 20339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145002006.975, "dur": 0.294, + "args": { + "External id": 512304,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 20340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145002012.308, "dur": 6.039, + "args": { + "External id": 512305,"Sequence number": 5516457, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 20341 + } + }, + { + "ph": "s", "id": 232, "pid": 2338711, "tid": 2338711, "ts": 6274145002012.308, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145002016.159, "dur": 0.993, + "args": { + "External id": 512306,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 20342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145002019.613, "dur": 6.417, + "args": { + "External id": 512307,"Sequence number": 5516458, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 20343 + } + }, + { + "ph": "s", "id": 231, "pid": 2338711, "tid": 2338711, "ts": 6274145002019.613, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145002024.946, "dur": 0.295, + "args": { + "External id": 512308,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 20344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338711, "tid": 2338711, + "ts": 6274145002027.097, "dur": 6.229, + "args": { + "External id": 512309,"Sequence number": 5516459, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 20345 + } + }, + { + "ph": "s", "id": 230, "pid": 2338711, "tid": 2338711, "ts": 6274145002027.097, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145002030.505, "dur": 1.981, + "args": { + "External id": 512310,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 20346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145002034.386, "dur": 4.659, + "args": { + "External id": 512311,"Sequence number": 5516460, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 20347 + } + }, + { + "ph": "s", "id": 229, "pid": 2338711, "tid": 2338711, "ts": 6274145002034.386, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145002037.419, "dur": 0.851, + "args": { + "External id": 512312,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 20348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2338711, + "ts": 6274145002043.124, "dur": 71.783, + "args": { + "External id": 512313,"Sequence number": 5516461, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 20349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2338711, + "ts": 6274145002044.529, "dur": 70.155, + "args": { + "External id": 512314,"Sequence number": 5516461, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 20350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274145002047.383, "dur": 43.997, + "args": { + "External id": 512315,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 20351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145002051.981, "dur": 3.512, + "args": { + "External id": 512316,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274145002093.345, "dur": 20.699, + "args": { + "External id": 512317,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 20353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145002144.206, "dur": 5.438, + "args": { + "External id": 512318,"Sequence number": 5516461, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 20354 + } + }, + { + "ph": "s", "id": 228, "pid": 2338711, "tid": 2338711, "ts": 6274145002144.206, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145002152.394, "dur": 1.363, + "args": { + "External id": 512319,"Sequence number": 5516462, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 20355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338711, "tid": 2338711, + "ts": 6274145002185.848, "dur": 48206.531, + "args": { + "External id": 512320,"Sequence number": 5516462, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 20356 + } + }, + { + "ph": "s", "id": 227, "pid": 2338711, "tid": 2338711, "ts": 6274145002185.848, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338711, "tid": 2338711, + "ts": 6274145002202.382, "dur": 33.584, + "args": { + "External id": 512321,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 20357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2338711, + "ts": 6274145002205.793, "dur": 29.961, + "args": { + "External id": 512322,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 20358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274145002207.768, "dur": 7.009, + "args": { + "External id": 512323,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 20359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145002210.831, "dur": 3.544, + "args": { + "External id": 512324,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274145002215.964, "dur": 19.340, + "args": { + "External id": 512325,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 20361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338711, "tid": 2338711, + "ts": 6274145002252.651, "dur": 26.182, + "args": { + "External id": 512326,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 20362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274145002253.720, "dur": 6.822, + "args": { + "External id": 512327,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 20363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145002256.409, "dur": 3.762, + "args": { + "External id": 512328,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274145002262.348, "dur": 16.280, + "args": { + "External id": 512329,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274145002264.086, "dur": 14.124, + "args": { + "External id": 512330,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 20366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338711, "tid": 2338711, + "ts": 6274145002282.603, "dur": 22.070, + "args": { + "External id": 512331,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 20367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274145002283.596, "dur": 6.860, + "args": { + "External id": 512332,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 20368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145002287.084, "dur": 3.158, + "args": { + "External id": 512333,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274145002290.937, "dur": 13.558, + "args": { + "External id": 512334,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274145002291.884, "dur": 12.298, + "args": { + "External id": 512335,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 20371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2338711, + "ts": 6274145002311.602, "dur": 17.789, + "args": { + "External id": 512336,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 20372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145002313.395, "dur": 4.447, + "args": { + "External id": 512337,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274145002318.526, "dur": 10.617, + "args": { + "External id": 512338,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 20374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274145002319.481, "dur": 9.386, + "args": { + "External id": 512339,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338711, "tid": 2338711, + "ts": 6274145002334.051, "dur": 22.121, + "args": { + "External id": 512340,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274145002361.170, "dur": 49.699, + "args": { + "External id": 512341,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274145002363.223, "dur": 47.153, + "args": { + "External id": 512342,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145002368.991, "dur": 1.079, + "args": { + "External id": 512343,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 20379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274145002371.308, "dur": 23.403, + "args": { + "External id": 512344,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274145002373.369, "dur": 21.027, + "args": { + "External id": 512345,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 20381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145002376.043, "dur": 2.824, + "args": { + "External id": 512346,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274145002379.963, "dur": 14.017, + "args": { + "External id": 512347,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 20383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338711, "tid": 2338711, + "ts": 6274145002415.493, "dur": 42164.564, + "args": { + "External id": 512348,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 20384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338711, "tid": 2338711, + "ts": 6274145002417.123, "dur": 42162.086, + "args": { + "External id": 512349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 20385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145044601.050, "dur": 10.147, + "args": { + "External id": 512350,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145044607.927, "dur": 1.174, + "args": { + "External id": 512351,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274145044616.756, "dur": 104.013, + "args": { + "External id": 512352,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145044618.048, "dur": 7.004, + "args": { + "External id": 512353,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145044620.534, "dur": 3.331, + "args": { + "External id": 512354,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145044622.700, "dur": 0.874, + "args": { + "External id": 512355,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145044626.632, "dur": 93.442, + "args": { + "External id": 512356,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145044628.397, "dur": 90.685, + "args": { + "External id": 512357,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145044727.197, "dur": 4.318, + "args": { + "External id": 512358,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145044729.689, "dur": 0.522, + "args": { + "External id": 512359,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145044739.482, "dur": 2.219, + "args": { + "External id": 512360,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145044752.402, "dur": 6.474, + "args": { + "External id": 512361,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145044754.683, "dur": 3.921, + "args": { + "External id": 512362,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145044893.609, "dur": 244.069, + "args": { + "External id": 512363,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145044897.596, "dur": 4.121, + "args": { + "External id": 512364,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145044903.506, "dur": 233.586, + "args": { + "External id": 512365,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274145044905.241, "dur": 0.599, + "args": { + "External id": 512366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274145044907.518, "dur": 26.755, + "args": { + "External id": 512367,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274145044936.148, "dur": 3.604, + "args": { + "External id": 512368,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145044938.576, "dur": 0.884, + "args": { + "External id": 512369,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145044940.950, "dur": 27.872, + "args": { + "External id": 512370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145044942.829, "dur": 1.182, + "args": { + "External id": 512371,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145044945.331, "dur": 23.208, + "args": { + "External id": 512372,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145044953.358, "dur": 2.891, + "args": { + "External id": 512373,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274145044970.489, "dur": 22.015, + "args": { + "External id": 512374,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274145044994.721, "dur": 14.531, + "args": { + "External id": 512375,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274145045012.625, "dur": 17.277, + "args": { + "External id": 512376,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274145045031.595, "dur": 14.007, + "args": { + "External id": 512377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274145045047.433, "dur": 57.493, + "args": { + "External id": 512378,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145045049.868, "dur": 1.857, + "args": { + "External id": 512379,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145045085.293, "dur": 0.778, + "args": { + "External id": 512380,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274145045109.174, "dur": 13.781, + "args": { + "External id": 512381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145045124.278, "dur": 11.588, + "args": { + "External id": 512382,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145045146.968, "dur": 2.456, + "args": { + "External id": 512383,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145045156.811, "dur": 4.290, + "args": { + "External id": 512384,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145045159.658, "dur": 0.544, + "args": { + "External id": 512385,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145045241.672, "dur": 70.157, + "args": { + "External id": 512386,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145045317.699, "dur": 9.053, + "args": { + "External id": 512387,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145045323.102, "dur": 1.005, + "args": { + "External id": 512388,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274145045328.653, "dur": 28.916, + "args": { + "External id": 512389,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145045363.308, "dur": 6.072, + "args": { + "External id": 512390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145045365.063, "dur": 3.619, + "args": { + "External id": 512391,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145045367.557, "dur": 0.905, + "args": { + "External id": 512392,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145045372.861, "dur": 45.575, + "args": { + "External id": 512393,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145045374.330, "dur": 43.616, + "args": { + "External id": 512394,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145045425.029, "dur": 17.716, + "args": { + "External id": 512395,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145045449.308, "dur": 3.622, + "args": { + "External id": 512396,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145045451.436, "dur": 0.640, + "args": { + "External id": 512397,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274145045457.326, "dur": 56.112, + "args": { + "External id": 512398,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145045458.538, "dur": 9.818, + "args": { + "External id": 512399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145045459.437, "dur": 8.228, + "args": { + "External id": 512400,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145045463.378, "dur": 4.073, + "args": { + "External id": 512401,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145045472.656, "dur": 40.425, + "args": { + "External id": 512402,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145045473.449, "dur": 39.005, + "args": { + "External id": 512403,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145045517.248, "dur": 3.947, + "args": { + "External id": 512404,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145045519.355, "dur": 0.553, + "args": { + "External id": 512405,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145045527.374, "dur": 1.463, + "args": { + "External id": 512406,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145045539.865, "dur": 7.097, + "args": { + "External id": 512407,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145045542.252, "dur": 4.464, + "args": { + "External id": 512408,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145045662.696, "dur": 197.299, + "args": { + "External id": 512409,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145045667.715, "dur": 3.377, + "args": { + "External id": 512410,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145045672.997, "dur": 186.293, + "args": { + "External id": 512411,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274145045674.343, "dur": 0.544, + "args": { + "External id": 512412,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274145045676.610, "dur": 23.070, + "args": { + "External id": 512413,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274145045704.018, "dur": 5.417, + "args": { + "External id": 512414,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145045708.096, "dur": 1.080, + "args": { + "External id": 512415,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145045710.594, "dur": 23.084, + "args": { + "External id": 512416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145045711.875, "dur": 1.429, + "args": { + "External id": 512417,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145045714.534, "dur": 18.858, + "args": { + "External id": 512418,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145045717.906, "dur": 2.309, + "args": { + "External id": 512419,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274145045735.203, "dur": 23.841, + "args": { + "External id": 512420,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274145045760.708, "dur": 17.643, + "args": { + "External id": 512421,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274145045781.387, "dur": 13.982, + "args": { + "External id": 512422,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274145045796.952, "dur": 12.880, + "args": { + "External id": 512423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274145045811.827, "dur": 21.490, + "args": { + "External id": 512424,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145045816.097, "dur": 1.557, + "args": { + "External id": 512425,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145045819.686, "dur": 0.893, + "args": { + "External id": 512426,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274145045834.918, "dur": 10.950, + "args": { + "External id": 512427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145045847.033, "dur": 10.929, + "args": { + "External id": 512428,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145045867.402, "dur": 1.997, + "args": { + "External id": 512429,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145045879.355, "dur": 4.011, + "args": { + "External id": 512430,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145045881.956, "dur": 0.592, + "args": { + "External id": 512431,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145045956.664, "dur": 56.295, + "args": { + "External id": 512432,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145046020.732, "dur": 4.958, + "args": { + "External id": 512433,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145046023.530, "dur": 0.969, + "args": { + "External id": 512434,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274145046027.379, "dur": 58.037, + "args": { + "External id": 512435,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145046093.481, "dur": 7.217, + "args": { + "External id": 512436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145046095.171, "dur": 4.562, + "args": { + "External id": 512437,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145046098.017, "dur": 1.486, + "args": { + "External id": 512438,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145046106.189, "dur": 49.484, + "args": { + "External id": 512439,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145046107.744, "dur": 47.383, + "args": { + "External id": 512440,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145046159.864, "dur": 17.202, + "args": { + "External id": 512441,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145046183.282, "dur": 4.010, + "args": { + "External id": 512442,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145046185.698, "dur": 0.523, + "args": { + "External id": 512443,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274145046191.663, "dur": 48.810, + "args": { + "External id": 512444,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145046193.261, "dur": 6.793, + "args": { + "External id": 512445,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145046196.386, "dur": 3.050, + "args": { + "External id": 512446,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145046198.650, "dur": 0.658, + "args": { + "External id": 512447,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145046200.626, "dur": 39.513, + "args": { + "External id": 512448,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145046201.722, "dur": 37.895, + "args": { + "External id": 512449,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145046245.019, "dur": 4.058, + "args": { + "External id": 512450,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145046247.433, "dur": 0.530, + "args": { + "External id": 512451,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145046255.669, "dur": 1.886, + "args": { + "External id": 512452,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145046268.552, "dur": 9.571, + "args": { + "External id": 512453,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145046270.766, "dur": 7.067, + "args": { + "External id": 512454,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145046366.305, "dur": 183.000, + "args": { + "External id": 512455,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145046368.618, "dur": 1.941, + "args": { + "External id": 512456,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145046371.999, "dur": 176.820, + "args": { + "External id": 512457,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274145046373.642, "dur": 0.462, + "args": { + "External id": 512458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274145046375.844, "dur": 22.246, + "args": { + "External id": 512459,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274145046401.968, "dur": 4.458, + "args": { + "External id": 512460,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145046405.444, "dur": 0.731, + "args": { + "External id": 512461,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145046407.259, "dur": 23.119, + "args": { + "External id": 512462,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145046408.212, "dur": 1.256, + "args": { + "External id": 512463,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145046410.685, "dur": 19.411, + "args": { + "External id": 512464,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145046415.358, "dur": 2.971, + "args": { + "External id": 512465,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274145046431.830, "dur": 18.552, + "args": { + "External id": 512466,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274145046451.902, "dur": 13.043, + "args": { + "External id": 512467,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274145046467.383, "dur": 12.860, + "args": { + "External id": 512468,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274145046481.883, "dur": 11.511, + "args": { + "External id": 512469,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274145046497.364, "dur": 18.587, + "args": { + "External id": 512470,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145046499.296, "dur": 1.428, + "args": { + "External id": 512471,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145046502.837, "dur": 0.647, + "args": { + "External id": 512472,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274145046517.490, "dur": 13.498, + "args": { + "External id": 512473,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145046532.315, "dur": 15.619, + "args": { + "External id": 512474,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145046555.687, "dur": 1.914, + "args": { + "External id": 512475,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145046568.554, "dur": 3.792, + "args": { + "External id": 512476,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145046570.897, "dur": 0.575, + "args": { + "External id": 512477,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145046662.238, "dur": 57.618, + "args": { + "External id": 512478,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145046725.051, "dur": 6.084, + "args": { + "External id": 512479,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145046728.568, "dur": 1.179, + "args": { + "External id": 512480,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274145046732.730, "dur": 33.328, + "args": { + "External id": 512481,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145046771.058, "dur": 6.724, + "args": { + "External id": 512482,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145046772.638, "dur": 4.519, + "args": { + "External id": 512483,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145046776.191, "dur": 0.756, + "args": { + "External id": 512484,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145046782.847, "dur": 78.375, + "args": { + "External id": 512485,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145046784.229, "dur": 76.271, + "args": { + "External id": 512486,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145046865.226, "dur": 28.916, + "args": { + "External id": 512487,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145046900.215, "dur": 4.024, + "args": { + "External id": 512488,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145046902.715, "dur": 0.760, + "args": { + "External id": 512489,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274145046910.747, "dur": 48.107, + "args": { + "External id": 512490,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145046911.911, "dur": 5.575, + "args": { + "External id": 512491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145046912.733, "dur": 4.187, + "args": { + "External id": 512492,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145046914.457, "dur": 2.296, + "args": { + "External id": 512493,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145046918.114, "dur": 40.291, + "args": { + "External id": 512494,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145046919.086, "dur": 38.717, + "args": { + "External id": 512495,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145046965.246, "dur": 3.606, + "args": { + "External id": 512496,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145046967.300, "dur": 0.442, + "args": { + "External id": 512497,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145046975.258, "dur": 1.538, + "args": { + "External id": 512498,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145046984.313, "dur": 8.108, + "args": { + "External id": 512499,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145046986.667, "dur": 5.447, + "args": { + "External id": 512500,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145047120.721, "dur": 203.756, + "args": { + "External id": 512501,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145047123.614, "dur": 3.358, + "args": { + "External id": 512502,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145047128.469, "dur": 195.544, + "args": { + "External id": 512503,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274145047130.022, "dur": 0.697, + "args": { + "External id": 512504,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274145047134.371, "dur": 21.660, + "args": { + "External id": 512505,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274145047157.712, "dur": 3.555, + "args": { + "External id": 512506,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145047160.150, "dur": 0.873, + "args": { + "External id": 512507,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145047162.010, "dur": 24.535, + "args": { + "External id": 512508,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145047163.398, "dur": 2.536, + "args": { + "External id": 512509,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145047167.489, "dur": 18.814, + "args": { + "External id": 512510,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145047173.091, "dur": 2.314, + "args": { + "External id": 512511,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274145047187.968, "dur": 20.249, + "args": { + "External id": 512512,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274145047232.502, "dur": 13.476, + "args": { + "External id": 512513,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274145047248.939, "dur": 12.317, + "args": { + "External id": 512514,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274145047262.762, "dur": 11.815, + "args": { + "External id": 512515,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274145047276.738, "dur": 19.035, + "args": { + "External id": 512516,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145047278.709, "dur": 1.562, + "args": { + "External id": 512517,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145047282.337, "dur": 0.380, + "args": { + "External id": 512518,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274145047299.356, "dur": 10.868, + "args": { + "External id": 512519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145047311.209, "dur": 11.727, + "args": { + "External id": 512520,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145047331.829, "dur": 1.965, + "args": { + "External id": 512521,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145047343.551, "dur": 4.140, + "args": { + "External id": 512522,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145047346.354, "dur": 0.455, + "args": { + "External id": 512523,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145047417.227, "dur": 57.617, + "args": { + "External id": 512524,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145047480.464, "dur": 6.167, + "args": { + "External id": 512525,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145047483.418, "dur": 2.111, + "args": { + "External id": 512526,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274145047490.564, "dur": 23.819, + "args": { + "External id": 512527,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145047518.861, "dur": 4.841, + "args": { + "External id": 512528,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145047520.229, "dur": 2.779, + "args": { + "External id": 512529,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145047522.156, "dur": 0.655, + "args": { + "External id": 512530,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145047526.311, "dur": 41.421, + "args": { + "External id": 512531,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145047527.309, "dur": 39.797, + "args": { + "External id": 512532,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145047571.539, "dur": 13.828, + "args": { + "External id": 512533,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145047611.800, "dur": 5.330, + "args": { + "External id": 512534,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145047614.872, "dur": 1.001, + "args": { + "External id": 512535,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274145047622.025, "dur": 52.989, + "args": { + "External id": 512536,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145047623.133, "dur": 4.087, + "args": { + "External id": 512537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145047624.029, "dur": 2.596, + "args": { + "External id": 512538,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145047625.845, "dur": 0.647, + "args": { + "External id": 512539,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145047630.200, "dur": 44.366, + "args": { + "External id": 512540,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145047630.832, "dur": 43.208, + "args": { + "External id": 512541,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145047679.810, "dur": 3.768, + "args": { + "External id": 512542,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145047681.876, "dur": 0.476, + "args": { + "External id": 512543,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145047689.986, "dur": 1.461, + "args": { + "External id": 512544,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145047699.667, "dur": 6.847, + "args": { + "External id": 512545,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145047702.000, "dur": 4.194, + "args": { + "External id": 512546,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145047797.259, "dur": 193.725, + "args": { + "External id": 512547,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145047799.534, "dur": 3.792, + "args": { + "External id": 512548,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145047805.073, "dur": 185.379, + "args": { + "External id": 512549,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274145047806.379, "dur": 0.325, + "args": { + "External id": 512550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274145047808.192, "dur": 21.918, + "args": { + "External id": 512551,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274145047831.315, "dur": 5.451, + "args": { + "External id": 512552,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145047835.893, "dur": 0.617, + "args": { + "External id": 512553,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145047837.685, "dur": 22.924, + "args": { + "External id": 512554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145047838.438, "dur": 1.303, + "args": { + "External id": 512555,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145047843.419, "dur": 16.857, + "args": { + "External id": 512556,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145047846.470, "dur": 2.729, + "args": { + "External id": 512557,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274145047862.036, "dur": 21.306, + "args": { + "External id": 512558,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274145047884.677, "dur": 20.130, + "args": { + "External id": 512559,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274145047907.674, "dur": 14.614, + "args": { + "External id": 512560,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274145047923.646, "dur": 14.562, + "args": { + "External id": 512561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274145047939.652, "dur": 24.520, + "args": { + "External id": 512562,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145047941.907, "dur": 1.259, + "args": { + "External id": 512563,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145047947.353, "dur": 2.203, + "args": { + "External id": 512564,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274145047965.657, "dur": 11.697, + "args": { + "External id": 512565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145047978.223, "dur": 11.138, + "args": { + "External id": 512566,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145047997.601, "dur": 1.874, + "args": { + "External id": 512567,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145048007.619, "dur": 3.587, + "args": { + "External id": 512568,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145048009.936, "dur": 0.476, + "args": { + "External id": 512569,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145048116.470, "dur": 62.184, + "args": { + "External id": 512570,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145048184.622, "dur": 8.455, + "args": { + "External id": 512571,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145048190.487, "dur": 0.936, + "args": { + "External id": 512572,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274145048194.383, "dur": 28.587, + "args": { + "External id": 512573,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145048227.825, "dur": 13.032, + "args": { + "External id": 512574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145048235.505, "dur": 4.631, + "args": { + "External id": 512575,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145048237.404, "dur": 2.502, + "args": { + "External id": 512576,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145048243.792, "dur": 45.529, + "args": { + "External id": 512577,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145048247.148, "dur": 41.633, + "args": { + "External id": 512578,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145048293.191, "dur": 14.653, + "args": { + "External id": 512579,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145048313.977, "dur": 3.967, + "args": { + "External id": 512580,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145048316.468, "dur": 0.656, + "args": { + "External id": 512581,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274145048322.297, "dur": 46.615, + "args": { + "External id": 512582,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145048323.395, "dur": 5.874, + "args": { + "External id": 512583,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145048324.184, "dur": 4.419, + "args": { + "External id": 512584,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145048327.796, "dur": 0.656, + "args": { + "External id": 512585,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145048329.845, "dur": 38.739, + "args": { + "External id": 512586,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145048330.222, "dur": 37.775, + "args": { + "External id": 512587,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145048372.471, "dur": 4.367, + "args": { + "External id": 512588,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145048375.082, "dur": 0.597, + "args": { + "External id": 512589,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145048383.451, "dur": 1.514, + "args": { + "External id": 512590,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145048395.051, "dur": 7.761, + "args": { + "External id": 512591,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145048396.975, "dur": 5.562, + "args": { + "External id": 512592,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145048493.294, "dur": 203.860, + "args": { + "External id": 512593,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145048495.145, "dur": 2.072, + "args": { + "External id": 512594,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145048500.573, "dur": 196.091, + "args": { + "External id": 512595,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274145048501.692, "dur": 0.350, + "args": { + "External id": 512596,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274145048505.564, "dur": 21.591, + "args": { + "External id": 512597,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274145048528.707, "dur": 5.237, + "args": { + "External id": 512598,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145048531.198, "dur": 2.530, + "args": { + "External id": 512599,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145048534.792, "dur": 23.077, + "args": { + "External id": 512600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145048535.931, "dur": 0.975, + "args": { + "External id": 512601,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145048538.092, "dur": 19.514, + "args": { + "External id": 512602,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145048541.166, "dur": 2.349, + "args": { + "External id": 512603,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274145048559.191, "dur": 19.681, + "args": { + "External id": 512604,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274145048580.448, "dur": 33.235, + "args": { + "External id": 512605,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274145048617.456, "dur": 14.097, + "args": { + "External id": 512606,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274145048633.033, "dur": 11.760, + "args": { + "External id": 512607,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274145048649.087, "dur": 20.676, + "args": { + "External id": 512608,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145048651.691, "dur": 1.907, + "args": { + "External id": 512609,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145048655.727, "dur": 0.796, + "args": { + "External id": 512610,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274145048671.237, "dur": 11.411, + "args": { + "External id": 512611,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145048683.779, "dur": 11.500, + "args": { + "External id": 512612,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145048705.144, "dur": 2.157, + "args": { + "External id": 512613,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145048716.645, "dur": 3.455, + "args": { + "External id": 512614,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145048719.000, "dur": 0.399, + "args": { + "External id": 512615,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145048789.261, "dur": 55.997, + "args": { + "External id": 512616,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145048850.230, "dur": 6.668, + "args": { + "External id": 512617,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145048853.318, "dur": 2.484, + "args": { + "External id": 512618,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274145048858.457, "dur": 25.200, + "args": { + "External id": 512619,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145048888.293, "dur": 7.869, + "args": { + "External id": 512620,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145048889.929, "dur": 5.413, + "args": { + "External id": 512621,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145048894.309, "dur": 0.795, + "args": { + "External id": 512622,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145048898.840, "dur": 41.700, + "args": { + "External id": 512623,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145048899.998, "dur": 39.855, + "args": { + "External id": 512624,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145048944.065, "dur": 15.595, + "args": { + "External id": 512625,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145048965.075, "dur": 3.678, + "args": { + "External id": 512626,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145048967.524, "dur": 0.388, + "args": { + "External id": 512627,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274145048972.585, "dur": 44.570, + "args": { + "External id": 512628,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145048975.662, "dur": 3.366, + "args": { + "External id": 512629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145048976.304, "dur": 2.170, + "args": { + "External id": 512630,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145048977.875, "dur": 0.468, + "args": { + "External id": 512631,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145048979.642, "dur": 37.158, + "args": { + "External id": 512632,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145048980.340, "dur": 35.863, + "args": { + "External id": 512633,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145049021.100, "dur": 5.762, + "args": { + "External id": 512634,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145049023.755, "dur": 1.874, + "args": { + "External id": 512635,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145049032.469, "dur": 1.261, + "args": { + "External id": 512636,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145049043.446, "dur": 5.687, + "args": { + "External id": 512637,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145049045.163, "dur": 3.721, + "args": { + "External id": 512638,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145049171.642, "dur": 192.267, + "args": { + "External id": 512639,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145049174.242, "dur": 4.442, + "args": { + "External id": 512640,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145049180.362, "dur": 182.992, + "args": { + "External id": 512641,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274145049181.751, "dur": 0.362, + "args": { + "External id": 512642,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274145049185.682, "dur": 23.358, + "args": { + "External id": 512643,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274145049210.685, "dur": 5.720, + "args": { + "External id": 512644,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145049215.011, "dur": 1.158, + "args": { + "External id": 512645,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145049217.316, "dur": 22.732, + "args": { + "External id": 512646,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145049218.879, "dur": 1.023, + "args": { + "External id": 512647,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145049221.171, "dur": 18.529, + "args": { + "External id": 512648,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145049223.986, "dur": 3.113, + "args": { + "External id": 512649,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274145049241.500, "dur": 21.278, + "args": { + "External id": 512650,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274145049264.336, "dur": 12.276, + "args": { + "External id": 512651,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274145049279.509, "dur": 12.859, + "args": { + "External id": 512652,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274145049298.478, "dur": 12.564, + "args": { + "External id": 512653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274145049312.624, "dur": 22.081, + "args": { + "External id": 512654,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145049316.485, "dur": 1.639, + "args": { + "External id": 512655,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145049320.232, "dur": 0.659, + "args": { + "External id": 512656,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274145049336.162, "dur": 12.970, + "args": { + "External id": 512657,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145049350.376, "dur": 12.013, + "args": { + "External id": 512658,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145049372.644, "dur": 2.116, + "args": { + "External id": 512659,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145049383.719, "dur": 4.228, + "args": { + "External id": 512660,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145049386.638, "dur": 0.583, + "args": { + "External id": 512661,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145049448.932, "dur": 54.535, + "args": { + "External id": 512662,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145049508.551, "dur": 4.883, + "args": { + "External id": 512663,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145049511.714, "dur": 0.713, + "args": { + "External id": 512664,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274145049514.832, "dur": 25.836, + "args": { + "External id": 512665,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145049544.767, "dur": 9.207, + "args": { + "External id": 512666,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145049549.494, "dur": 3.880, + "args": { + "External id": 512667,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145049552.013, "dur": 1.199, + "args": { + "External id": 512668,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145049556.309, "dur": 58.474, + "args": { + "External id": 512669,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145049557.436, "dur": 56.314, + "args": { + "External id": 512670,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145049620.062, "dur": 15.985, + "args": { + "External id": 512671,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145049642.685, "dur": 6.789, + "args": { + "External id": 512672,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145049647.436, "dur": 0.849, + "args": { + "External id": 512673,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338711, "tid": 2338711, + "ts": 6274145049653.404, "dur": 48.688, + "args": { + "External id": 512674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145049654.601, "dur": 4.170, + "args": { + "External id": 512675,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145049655.816, "dur": 2.348, + "args": { + "External id": 512676,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145049657.536, "dur": 0.493, + "args": { + "External id": 512677,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145049659.408, "dur": 42.257, + "args": { + "External id": 512678,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145049660.416, "dur": 40.576, + "args": { + "External id": 512679,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145049707.975, "dur": 3.698, + "args": { + "External id": 512680,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145049709.961, "dur": 0.620, + "args": { + "External id": 512681,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145049717.674, "dur": 1.764, + "args": { + "External id": 512682,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145049727.378, "dur": 6.654, + "args": { + "External id": 512683,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145049729.745, "dur": 4.040, + "args": { + "External id": 512684,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145049817.485, "dur": 173.341, + "args": { + "External id": 512685,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145049821.056, "dur": 3.444, + "args": { + "External id": 512686,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338711, "tid": 2338711, + "ts": 6274145049825.942, "dur": 164.409, + "args": { + "External id": 512687,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338711, "tid": 2338711, + "ts": 6274145049827.458, "dur": 0.334, + "args": { + "External id": 512688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338711, "tid": 2338711, + "ts": 6274145049829.552, "dur": 18.311, + "args": { + "External id": 512689,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338711, "tid": 2338711, + "ts": 6274145049849.489, "dur": 4.516, + "args": { + "External id": 512690,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145049852.996, "dur": 0.825, + "args": { + "External id": 512691,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145049854.847, "dur": 22.405, + "args": { + "External id": 512692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274145049856.062, "dur": 1.281, + "args": { + "External id": 512693,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274145049860.686, "dur": 16.319, + "args": { + "External id": 512694,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145049863.062, "dur": 2.326, + "args": { + "External id": 512695,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274145049878.477, "dur": 17.869, + "args": { + "External id": 512696,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274145049897.558, "dur": 14.884, + "args": { + "External id": 512697,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338711, "tid": 2338711, + "ts": 6274145049914.725, "dur": 12.076, + "args": { + "External id": 512698,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338711, "tid": 2338711, + "ts": 6274145049928.221, "dur": 11.149, + "args": { + "External id": 512699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274145049940.907, "dur": 19.174, + "args": { + "External id": 512700,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274145049943.570, "dur": 1.810, + "args": { + "External id": 512701,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145049947.147, "dur": 0.645, + "args": { + "External id": 512702,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338711, "tid": 2338711, + "ts": 6274145049964.087, "dur": 12.557, + "args": { + "External id": 512703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145049977.541, "dur": 11.763, + "args": { + "External id": 512704,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274145049996.914, "dur": 1.691, + "args": { + "External id": 512705,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145050006.217, "dur": 3.013, + "args": { + "External id": 512706,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145050008.106, "dur": 0.388, + "args": { + "External id": 512707,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145050109.956, "dur": 55.597, + "args": { + "External id": 512708,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338711, "tid": 2338711, + "ts": 6274145050171.674, "dur": 6.004, + "args": { + "External id": 512709,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145050175.148, "dur": 1.079, + "args": { + "External id": 512710,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274145050180.975, "dur": 29.312, + "args": { + "External id": 512711,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338711, "tid": 2338711, + "ts": 6274145050215.073, "dur": 5.804, + "args": { + "External id": 512712,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338711, "tid": 2338711, + "ts": 6274145050216.927, "dur": 3.282, + "args": { + "External id": 512713,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145050219.079, "dur": 0.914, + "args": { + "External id": 512714,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338711, "tid": 2338711, + "ts": 6274145050223.238, "dur": 41.418, + "args": { + "External id": 512715,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338711, "tid": 2338711, + "ts": 6274145050224.074, "dur": 39.999, + "args": { + "External id": 512716,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145050268.326, "dur": 15.941, + "args": { + "External id": 512717,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274145050291.119, "dur": 26.356, + "args": { + "External id": 512718,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338711, "tid": 2338711, + "ts": 6274145050293.759, "dur": 23.345, + "args": { + "External id": 512719,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145050298.453, "dur": 1.950, + "args": { + "External id": 512720,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 20756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274145050322.603, "dur": 26.224, + "args": { + "External id": 512721,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 20757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338711, "tid": 2338711, + "ts": 6274145050324.533, "dur": 23.854, + "args": { + "External id": 512722,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 20758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145050329.565, "dur": 3.342, + "args": { + "External id": 512723,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274145050333.955, "dur": 13.973, + "args": { + "External id": 512724,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2338711, + "ts": 6274145050361.274, "dur": 7.660, + "args": { + "External id": 512725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2338711, + "ts": 6274145050365.760, "dur": 2.674, + "args": { + "External id": 512726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338711, "tid": 2338711, + "ts": 6274145050370.057, "dur": 1.390, + "args": { + "External id": 512727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338711, "tid": 2338711, + "ts": 6274145050370.895, "dur": 0.469, + "args": { + "External id": 512728,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145050413.695, "dur": 22.762, + "args": { + "External id": 512729,"Sequence number": 5516463, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338711, "tid": 2338711, + "ts": 6274145050438.641, "dur": 13.336, + "args": { + "External id": 512730,"Sequence number": 5516464, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20766 + } + }, + { + "ph": "s", "id": 226, "pid": 2338711, "tid": 2338711, "ts": 6274145050438.641, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward", "pid": 2338711, "tid": 2338711, + "ts": 6274145050565.079, "dur": 65.103, + "args": { + "External id": 512731,"Record function id": 0, "Ev Idx": 20767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338711, "tid": 2338711, + "ts": 6274145050735.423, "dur": 39.087, + "args": { + "External id": 512732,"Sequence number": 5516465, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 20768 + } + }, + { + "ph": "s", "id": 225, "pid": 2338711, "tid": 2338711, "ts": 6274145050735.423, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ones_like", "pid": 2338711, "tid": 2338711, + "ts": 6274145050806.446, "dur": 26.828, + "args": { + "External id": 512733,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 20769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274145050808.304, "dur": 9.124, + "args": { + "External id": 512734,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 20770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274145050813.104, "dur": 3.753, + "args": { + "External id": 512735,"Record function id": 0, "Concrete Inputs": ["[1]", "[1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274145050818.893, "dur": 13.958, + "args": { + "External id": 512736,"Record function id": 0, "Concrete Inputs": ["", "1."], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 20772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338711, "tid": 2338711, + "ts": 6274147150035.412, "dur": 89.712, + "args": { + "External id": 512737,"Sequence number": 5516466, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 20773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338711, "tid": 2338711, + "ts": 6274147150135.914, "dur": 33.702, + "args": { + "External id": 512738,"Sequence number": 5516467, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 20774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2338711, + "ts": 6274147150180.550, "dur": 23.743, + "args": { + "External id": 512739,"Sequence number": 5516468, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2338711, + "ts": 6274147150207.155, "dur": 105.907, + "args": { + "External id": 512740,"Sequence number": 5516469, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[1], [1], []], "Ev Idx": 20776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2338711, + "ts": 6274147151367.849, "dur": 45.684, + "args": { + "External id": 512741,"Sequence number": 5516470, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 20777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2338711, + "ts": 6274147151417.333, "dur": 16.125, + "args": { + "External id": 512742,"Sequence number": 5516471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 20778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2338711, + "ts": 6274147151443.167, "dur": 14.998, + "args": { + "External id": 512743,"Sequence number": 5516472, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2338711, + "ts": 6274147151460.446, "dur": 13.008, + "args": { + "External id": 512744,"Sequence number": 5516473, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[1], [1], []], "Ev Idx": 20780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 2338711, "tid": 2338711, + "ts": 6274147153642.389, "dur": 3507.208, + "args": { + "External id": 512745,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 20781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 2338711, "tid": 2338711, + "ts": 6274147154285.055, "dur": 1346.420, + "args": { + "External id": 512746,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 20782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338711, "tid": 2338711, + "ts": 6274147154308.329, "dur": 81.856, + "args": { + "External id": 512747,"Record function id": 0, "Concrete Inputs": ["[68250]", "6", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 20783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274147154312.665, "dur": 17.043, + "args": { + "External id": 512748,"Record function id": 0, "Concrete Inputs": ["[68250]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338711, "tid": 2338711, + "ts": 6274147154334.769, "dur": 55.008, + "args": { + "External id": 512749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[68250]], "Ev Idx": 20785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338711, "tid": 2338711, + "ts": 6274147154339.944, "dur": 49.063, + "args": { + "External id": 512750,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[68250], []], "Ev Idx": 20786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157183.054, "dur": 4.947, + "args": { + "External id": 512751,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157190.184, "dur": 0.524, + "args": { + "External id": 512752,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157192.028, "dur": 0.382, + "args": { + "External id": 512753,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157193.528, "dur": 0.433, + "args": { + "External id": 512754,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157195.035, "dur": 0.484, + "args": { + "External id": 512755,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157197.011, "dur": 0.366, + "args": { + "External id": 512756,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157198.348, "dur": 0.607, + "args": { + "External id": 512757,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157201.946, "dur": 0.243, + "args": { + "External id": 512758,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157203.271, "dur": 0.367, + "args": { + "External id": 512759,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157204.619, "dur": 0.343, + "args": { + "External id": 512760,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157206.007, "dur": 0.237, + "args": { + "External id": 512761,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157207.108, "dur": 0.336, + "args": { + "External id": 512762,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157208.698, "dur": 0.233, + "args": { + "External id": 512763,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157209.735, "dur": 0.341, + "args": { + "External id": 512764,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157211.347, "dur": 0.231, + "args": { + "External id": 512765,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157214.505, "dur": 0.304, + "args": { + "External id": 512766,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157215.821, "dur": 0.374, + "args": { + "External id": 512767,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157217.479, "dur": 0.239, + "args": { + "External id": 512768,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157218.949, "dur": 0.221, + "args": { + "External id": 512769,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157220.282, "dur": 0.232, + "args": { + "External id": 512770,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157221.381, "dur": 0.227, + "args": { + "External id": 512771,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157222.857, "dur": 0.227, + "args": { + "External id": 512772,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157223.926, "dur": 0.228, + "args": { + "External id": 512773,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157227.563, "dur": 0.228, + "args": { + "External id": 512774,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157228.798, "dur": 0.243, + "args": { + "External id": 512775,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157230.141, "dur": 0.217, + "args": { + "External id": 512776,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157231.331, "dur": 0.237, + "args": { + "External id": 512777,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157232.424, "dur": 0.216, + "args": { + "External id": 512778,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157233.667, "dur": 0.232, + "args": { + "External id": 512779,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157234.676, "dur": 0.216, + "args": { + "External id": 512780,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157236.144, "dur": 0.368, + "args": { + "External id": 512781,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157239.915, "dur": 0.377, + "args": { + "External id": 512782,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157241.653, "dur": 0.366, + "args": { + "External id": 512783,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157243.237, "dur": 0.378, + "args": { + "External id": 512784,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157244.577, "dur": 0.352, + "args": { + "External id": 512785,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157245.999, "dur": 0.368, + "args": { + "External id": 512786,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157247.379, "dur": 0.221, + "args": { + "External id": 512787,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157248.711, "dur": 0.228, + "args": { + "External id": 512788,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157249.766, "dur": 0.370, + "args": { + "External id": 512789,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157252.868, "dur": 0.326, + "args": { + "External id": 512790,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157254.106, "dur": 0.238, + "args": { + "External id": 512791,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157255.406, "dur": 0.219, + "args": { + "External id": 512792,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157258.313, "dur": 0.245, + "args": { + "External id": 512793,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157259.557, "dur": 0.216, + "args": { + "External id": 512794,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157260.737, "dur": 0.221, + "args": { + "External id": 512795,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157261.732, "dur": 0.217, + "args": { + "External id": 512796,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157262.947, "dur": 0.221, + "args": { + "External id": 512797,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157266.497, "dur": 0.224, + "args": { + "External id": 512798,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157267.671, "dur": 0.221, + "args": { + "External id": 512799,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157268.665, "dur": 0.217, + "args": { + "External id": 512800,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157269.657, "dur": 0.223, + "args": { + "External id": 512801,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157270.875, "dur": 0.212, + "args": { + "External id": 512802,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157271.982, "dur": 0.219, + "args": { + "External id": 512803,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157273.172, "dur": 0.217, + "args": { + "External id": 512804,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157274.238, "dur": 0.221, + "args": { + "External id": 512805,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157277.782, "dur": 0.221, + "args": { + "External id": 512806,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157278.805, "dur": 0.221, + "args": { + "External id": 512807,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157279.857, "dur": 0.215, + "args": { + "External id": 512808,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157280.873, "dur": 0.222, + "args": { + "External id": 512809,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157282.108, "dur": 0.218, + "args": { + "External id": 512810,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157283.244, "dur": 0.220, + "args": { + "External id": 512811,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157284.235, "dur": 0.333, + "args": { + "External id": 512812,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157285.599, "dur": 0.524, + "args": { + "External id": 512813,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157289.072, "dur": 0.232, + "args": { + "External id": 512814,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157290.085, "dur": 0.348, + "args": { + "External id": 512815,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157291.624, "dur": 0.351, + "args": { + "External id": 512816,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157292.929, "dur": 0.369, + "args": { + "External id": 512817,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157294.208, "dur": 0.368, + "args": { + "External id": 512818,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157295.529, "dur": 0.220, + "args": { + "External id": 512819,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157296.533, "dur": 0.328, + "args": { + "External id": 512820,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157297.625, "dur": 0.220, + "args": { + "External id": 512821,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157300.889, "dur": 0.216, + "args": { + "External id": 512822,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157301.961, "dur": 0.223, + "args": { + "External id": 512823,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157303.135, "dur": 0.212, + "args": { + "External id": 512824,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157304.168, "dur": 0.364, + "args": { + "External id": 512825,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157305.429, "dur": 0.381, + "args": { + "External id": 512826,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157306.635, "dur": 0.230, + "args": { + "External id": 512827,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157308.584, "dur": 0.406, + "args": { + "External id": 512828,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157309.888, "dur": 0.452, + "args": { + "External id": 512829,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157313.738, "dur": 0.335, + "args": { + "External id": 512830,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157314.890, "dur": 0.324, + "args": { + "External id": 512831,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157316.158, "dur": 0.369, + "args": { + "External id": 512832,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157317.322, "dur": 0.350, + "args": { + "External id": 512833,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157318.565, "dur": 0.359, + "args": { + "External id": 512834,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157319.839, "dur": 0.233, + "args": { + "External id": 512835,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157321.130, "dur": 0.285, + "args": { + "External id": 512836,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157322.572, "dur": 0.221, + "args": { + "External id": 512837,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157326.100, "dur": 0.223, + "args": { + "External id": 512838,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157327.285, "dur": 0.366, + "args": { + "External id": 512839,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157328.435, "dur": 0.355, + "args": { + "External id": 512840,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157329.672, "dur": 0.380, + "args": { + "External id": 512841,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157331.149, "dur": 0.218, + "args": { + "External id": 512842,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157332.215, "dur": 0.223, + "args": { + "External id": 512843,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157333.926, "dur": 0.249, + "args": { + "External id": 512844,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157335.025, "dur": 0.229, + "args": { + "External id": 512845,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157338.240, "dur": 0.226, + "args": { + "External id": 512846,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157339.547, "dur": 0.220, + "args": { + "External id": 512847,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157340.875, "dur": 0.216, + "args": { + "External id": 512848,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157341.941, "dur": 0.220, + "args": { + "External id": 512849,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157342.972, "dur": 0.216, + "args": { + "External id": 512850,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157344.029, "dur": 0.231, + "args": { + "External id": 512851,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157345.032, "dur": 0.371, + "args": { + "External id": 512852,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157346.183, "dur": 0.333, + "args": { + "External id": 512853,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157348.899, "dur": 0.332, + "args": { + "External id": 512854,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157349.996, "dur": 0.372, + "args": { + "External id": 512855,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157351.163, "dur": 0.363, + "args": { + "External id": 512856,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157352.366, "dur": 0.293, + "args": { + "External id": 512857,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157353.485, "dur": 0.374, + "args": { + "External id": 512858,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157354.815, "dur": 0.220, + "args": { + "External id": 512859,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157356.425, "dur": 0.341, + "args": { + "External id": 512860,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157357.732, "dur": 0.353, + "args": { + "External id": 512861,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157361.304, "dur": 0.331, + "args": { + "External id": 512862,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157362.411, "dur": 0.219, + "args": { + "External id": 512863,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157363.436, "dur": 0.216, + "args": { + "External id": 512864,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157364.432, "dur": 0.221, + "args": { + "External id": 512865,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157365.535, "dur": 0.222, + "args": { + "External id": 512866,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157366.629, "dur": 0.222, + "args": { + "External id": 512867,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157367.825, "dur": 0.216, + "args": { + "External id": 512868,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157368.864, "dur": 0.221, + "args": { + "External id": 512869,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157372.547, "dur": 0.225, + "args": { + "External id": 512870,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157373.537, "dur": 0.222, + "args": { + "External id": 512871,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157374.547, "dur": 0.218, + "args": { + "External id": 512872,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157375.612, "dur": 0.220, + "args": { + "External id": 512873,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157376.752, "dur": 0.281, + "args": { + "External id": 512874,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157377.866, "dur": 0.225, + "args": { + "External id": 512875,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157378.888, "dur": 0.216, + "args": { + "External id": 512876,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157379.950, "dur": 0.309, + "args": { + "External id": 512877,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157383.324, "dur": 0.229, + "args": { + "External id": 512878,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157384.528, "dur": 0.347, + "args": { + "External id": 512879,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157386.199, "dur": 0.305, + "args": { + "External id": 512880,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157387.459, "dur": 0.362, + "args": { + "External id": 512881,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157388.774, "dur": 0.383, + "args": { + "External id": 512882,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157390.072, "dur": 0.220, + "args": { + "External id": 512883,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157391.064, "dur": 0.499, + "args": { + "External id": 512884,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157392.328, "dur": 0.321, + "args": { + "External id": 512885,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157395.258, "dur": 0.352, + "args": { + "External id": 512886,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157396.545, "dur": 0.220, + "args": { + "External id": 512887,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157397.562, "dur": 0.218, + "args": { + "External id": 512888,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157398.546, "dur": 0.224, + "args": { + "External id": 512889,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157399.648, "dur": 0.219, + "args": { + "External id": 512890,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157400.710, "dur": 0.220, + "args": { + "External id": 512891,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157401.726, "dur": 0.216, + "args": { + "External id": 512892,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157402.954, "dur": 0.222, + "args": { + "External id": 512893,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157406.187, "dur": 0.217, + "args": { + "External id": 512894,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157407.324, "dur": 0.220, + "args": { + "External id": 512895,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157408.522, "dur": 0.216, + "args": { + "External id": 512896,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157409.497, "dur": 0.220, + "args": { + "External id": 512897,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157410.597, "dur": 0.216, + "args": { + "External id": 512898,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157411.601, "dur": 0.221, + "args": { + "External id": 512899,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157413.239, "dur": 0.220, + "args": { + "External id": 512900,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157414.251, "dur": 0.388, + "args": { + "External id": 512901,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157417.506, "dur": 0.333, + "args": { + "External id": 512902,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157418.610, "dur": 0.234, + "args": { + "External id": 512903,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157419.618, "dur": 0.355, + "args": { + "External id": 512904,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157420.767, "dur": 0.220, + "args": { + "External id": 512905,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157421.755, "dur": 0.337, + "args": { + "External id": 512906,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157424.153, "dur": 0.234, + "args": { + "External id": 512907,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157425.238, "dur": 0.330, + "args": { + "External id": 512908,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157426.390, "dur": 0.331, + "args": { + "External id": 512909,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157429.513, "dur": 0.189, + "args": { + "External id": 512910,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157430.500, "dur": 0.361, + "args": { + "External id": 512911,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157431.666, "dur": 0.365, + "args": { + "External id": 512912,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157432.992, "dur": 0.338, + "args": { + "External id": 512913,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157434.108, "dur": 0.330, + "args": { + "External id": 512914,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157435.390, "dur": 0.220, + "args": { + "External id": 512915,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157436.450, "dur": 0.332, + "args": { + "External id": 512916,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157437.713, "dur": 0.368, + "args": { + "External id": 512917,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157441.086, "dur": 0.222, + "args": { + "External id": 512918,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157442.257, "dur": 0.221, + "args": { + "External id": 512919,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157443.324, "dur": 0.225, + "args": { + "External id": 512920,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157444.507, "dur": 0.221, + "args": { + "External id": 512921,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157445.713, "dur": 0.213, + "args": { + "External id": 512922,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157446.845, "dur": 0.219, + "args": { + "External id": 512923,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157448.311, "dur": 0.222, + "args": { + "External id": 512924,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157449.400, "dur": 0.228, + "args": { + "External id": 512925,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157452.887, "dur": 0.419, + "args": { + "External id": 512926,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157454.133, "dur": 0.389, + "args": { + "External id": 512927,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157455.400, "dur": 0.368, + "args": { + "External id": 512928,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157456.546, "dur": 0.347, + "args": { + "External id": 512929,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157457.699, "dur": 0.359, + "args": { + "External id": 512930,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157458.891, "dur": 0.230, + "args": { + "External id": 512931,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157459.933, "dur": 0.326, + "args": { + "External id": 512932,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157461.046, "dur": 0.335, + "args": { + "External id": 512933,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157464.292, "dur": 0.283, + "args": { + "External id": 512934,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157465.374, "dur": 0.340, + "args": { + "External id": 512935,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157466.525, "dur": 0.347, + "args": { + "External id": 512936,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157467.660, "dur": 0.339, + "args": { + "External id": 512937,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157468.768, "dur": 0.218, + "args": { + "External id": 512938,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157469.760, "dur": 0.220, + "args": { + "External id": 512939,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157470.808, "dur": 0.216, + "args": { + "External id": 512940,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157476.089, "dur": 0.240, + "args": { + "External id": 512941,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157479.262, "dur": 0.231, + "args": { + "External id": 512942,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157480.346, "dur": 0.220, + "args": { + "External id": 512943,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157481.390, "dur": 0.215, + "args": { + "External id": 512944,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157482.419, "dur": 0.220, + "args": { + "External id": 512945,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157483.413, "dur": 0.216, + "args": { + "External id": 512946,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157484.495, "dur": 0.231, + "args": { + "External id": 512947,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157485.580, "dur": 0.217, + "args": { + "External id": 512948,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157486.576, "dur": 0.221, + "args": { + "External id": 512949,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157489.417, "dur": 0.218, + "args": { + "External id": 512950,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157490.401, "dur": 0.223, + "args": { + "External id": 512951,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157491.846, "dur": 0.221, + "args": { + "External id": 512952,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157492.872, "dur": 0.227, + "args": { + "External id": 512953,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157493.949, "dur": 0.358, + "args": { + "External id": 512954,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157495.154, "dur": 0.226, + "args": { + "External id": 512955,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157496.230, "dur": 0.325, + "args": { + "External id": 512956,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157497.446, "dur": 0.333, + "args": { + "External id": 512957,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157500.743, "dur": 0.337, + "args": { + "External id": 512958,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157502.040, "dur": 0.220, + "args": { + "External id": 512959,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157503.084, "dur": 0.217, + "args": { + "External id": 512960,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157504.180, "dur": 0.227, + "args": { + "External id": 512961,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157505.200, "dur": 0.215, + "args": { + "External id": 512962,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157506.222, "dur": 0.220, + "args": { + "External id": 512963,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157507.220, "dur": 0.215, + "args": { + "External id": 512964,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157508.218, "dur": 0.234, + "args": { + "External id": 512965,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157511.180, "dur": 0.216, + "args": { + "External id": 512966,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157512.175, "dur": 0.222, + "args": { + "External id": 512967,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157513.200, "dur": 0.217, + "args": { + "External id": 512968,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157514.237, "dur": 0.220, + "args": { + "External id": 512969,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157515.235, "dur": 0.218, + "args": { + "External id": 512970,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157516.247, "dur": 0.220, + "args": { + "External id": 512971,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157517.300, "dur": 0.217, + "args": { + "External id": 512972,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157518.307, "dur": 0.220, + "args": { + "External id": 512973,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157521.231, "dur": 0.218, + "args": { + "External id": 512974,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157522.240, "dur": 0.221, + "args": { + "External id": 512975,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157523.250, "dur": 0.217, + "args": { + "External id": 512976,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157524.266, "dur": 0.221, + "args": { + "External id": 512977,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157525.297, "dur": 0.380, + "args": { + "External id": 512978,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157526.485, "dur": 0.220, + "args": { + "External id": 512979,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157527.477, "dur": 0.218, + "args": { + "External id": 512980,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157528.477, "dur": 0.351, + "args": { + "External id": 512981,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157531.800, "dur": 0.344, + "args": { + "External id": 512982,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157532.975, "dur": 0.359, + "args": { + "External id": 512983,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157534.484, "dur": 0.339, + "args": { + "External id": 512984,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157535.634, "dur": 0.364, + "args": { + "External id": 512985,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157536.776, "dur": 0.337, + "args": { + "External id": 512986,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157537.922, "dur": 0.221, + "args": { + "External id": 512987,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157538.966, "dur": 0.215, + "args": { + "External id": 512988,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157539.941, "dur": 0.221, + "args": { + "External id": 512989,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157542.916, "dur": 0.218, + "args": { + "External id": 512990,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157543.983, "dur": 0.221, + "args": { + "External id": 512991,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157544.974, "dur": 0.216, + "args": { + "External id": 512992,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157546.003, "dur": 0.221, + "args": { + "External id": 512993,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157547.022, "dur": 0.218, + "args": { + "External id": 512994,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157548.142, "dur": 0.227, + "args": { + "External id": 512995,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157549.168, "dur": 0.218, + "args": { + "External id": 512996,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157550.179, "dur": 0.220, + "args": { + "External id": 512997,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157553.120, "dur": 0.222, + "args": { + "External id": 512998,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157554.327, "dur": 0.221, + "args": { + "External id": 512999,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157555.407, "dur": 0.226, + "args": { + "External id": 513000,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157556.620, "dur": 0.220, + "args": { + "External id": 513001,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157557.617, "dur": 0.217, + "args": { + "External id": 513002,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157558.819, "dur": 0.221, + "args": { + "External id": 513003,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157559.832, "dur": 0.216, + "args": { + "External id": 513004,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157560.807, "dur": 0.221, + "args": { + "External id": 513005,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157564.124, "dur": 0.224, + "args": { + "External id": 513006,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157565.127, "dur": 0.221, + "args": { + "External id": 513007,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157566.128, "dur": 0.216, + "args": { + "External id": 513008,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157567.367, "dur": 0.220, + "args": { + "External id": 513009,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157568.450, "dur": 0.334, + "args": { + "External id": 513010,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157569.670, "dur": 0.218, + "args": { + "External id": 513011,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157570.747, "dur": 0.333, + "args": { + "External id": 513012,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157571.968, "dur": 0.211, + "args": { + "External id": 513013,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157575.128, "dur": 0.412, + "args": { + "External id": 513014,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157576.349, "dur": 0.344, + "args": { + "External id": 513015,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157577.466, "dur": 0.349, + "args": { + "External id": 513016,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157578.607, "dur": 0.380, + "args": { + "External id": 513017,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157579.798, "dur": 0.335, + "args": { + "External id": 513018,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157580.957, "dur": 0.223, + "args": { + "External id": 513019,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157582.415, "dur": 0.370, + "args": { + "External id": 513020,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157584.808, "dur": 0.243, + "args": { + "External id": 513021,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157598.957, "dur": 0.854, + "args": { + "External id": 513022,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147157602.716, "dur": 0.207, + "args": { + "External id": 513023,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2338711, "tid": 2338711, + "ts": 6274147157657.973, "dur": 1545.722, + "args": { + "External id": 513024,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2338711, "tid": 2338711, + "ts": 6274147158048.298, "dur": 1062.134, + "args": { + "External id": 519169,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158083.580, "dur": 10.746, + "args": { + "External id": 519170,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158089.793, "dur": 3.818, + "args": { + "External id": 519171,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158094.982, "dur": 1.349, + "args": { + "External id": 519172,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158095.687, "dur": 0.554, + "args": { + "External id": 519173,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158096.702, "dur": 4.961, + "args": { + "External id": 519174,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158098.700, "dur": 2.758, + "args": { + "External id": 519175,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158101.931, "dur": 1.265, + "args": { + "External id": 519176,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158102.318, "dur": 0.791, + "args": { + "External id": 519177,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158103.602, "dur": 2.804, + "args": { + "External id": 519178,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158105.450, "dur": 0.890, + "args": { + "External id": 519179,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158106.661, "dur": 2.975, + "args": { + "External id": 519180,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158109.025, "dur": 0.548, + "args": { + "External id": 519181,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158112.063, "dur": 1.095, + "args": { + "External id": 519182,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158112.421, "dur": 0.664, + "args": { + "External id": 519183,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158113.420, "dur": 2.941, + "args": { + "External id": 519184,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158115.686, "dur": 0.611, + "args": { + "External id": 519185,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158116.593, "dur": 3.254, + "args": { + "External id": 519186,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158118.869, "dur": 0.719, + "args": { + "External id": 519187,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158120.074, "dur": 1.104, + "args": { + "External id": 519188,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158120.431, "dur": 0.681, + "args": { + "External id": 519189,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158121.463, "dur": 4.836, + "args": { + "External id": 519190,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158123.934, "dur": 2.266, + "args": { + "External id": 519191,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158126.559, "dur": 1.132, + "args": { + "External id": 519192,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158126.918, "dur": 0.527, + "args": { + "External id": 519193,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158127.944, "dur": 2.648, + "args": { + "External id": 519194,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158129.469, "dur": 0.878, + "args": { + "External id": 519195,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158130.828, "dur": 2.939, + "args": { + "External id": 519196,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158133.038, "dur": 0.656, + "args": { + "External id": 519197,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158136.216, "dur": 1.039, + "args": { + "External id": 519198,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158136.608, "dur": 0.577, + "args": { + "External id": 519199,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158137.484, "dur": 2.621, + "args": { + "External id": 519200,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158139.398, "dur": 0.638, + "args": { + "External id": 519201,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158140.332, "dur": 2.645, + "args": { + "External id": 519202,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158142.405, "dur": 0.506, + "args": { + "External id": 519203,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158143.224, "dur": 0.934, + "args": { + "External id": 519204,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158143.591, "dur": 0.498, + "args": { + "External id": 519205,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158144.385, "dur": 3.964, + "args": { + "External id": 519206,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158146.181, "dur": 2.097, + "args": { + "External id": 519207,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158148.598, "dur": 1.070, + "args": { + "External id": 519208,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158148.960, "dur": 0.638, + "args": { + "External id": 519209,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158149.895, "dur": 2.118, + "args": { + "External id": 519210,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158151.182, "dur": 0.764, + "args": { + "External id": 519211,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158152.238, "dur": 2.629, + "args": { + "External id": 519212,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158154.051, "dur": 0.745, + "args": { + "External id": 519213,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158157.268, "dur": 1.068, + "args": { + "External id": 519214,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158157.638, "dur": 0.627, + "args": { + "External id": 519215,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158158.573, "dur": 2.164, + "args": { + "External id": 519216,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158160.012, "dur": 0.660, + "args": { + "External id": 519217,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158160.999, "dur": 2.646, + "args": { + "External id": 519218,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158163.049, "dur": 0.524, + "args": { + "External id": 519219,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158163.878, "dur": 1.012, + "args": { + "External id": 519220,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158164.246, "dur": 0.563, + "args": { + "External id": 519221,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158165.114, "dur": 3.788, + "args": { + "External id": 519222,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158167.312, "dur": 1.509, + "args": { + "External id": 519223,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158169.153, "dur": 1.017, + "args": { + "External id": 519224,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158169.512, "dur": 0.588, + "args": { + "External id": 519225,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158170.582, "dur": 3.054, + "args": { + "External id": 519226,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158172.663, "dur": 0.907, + "args": { + "External id": 519227,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158173.862, "dur": 2.998, + "args": { + "External id": 519228,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158176.127, "dur": 0.660, + "args": { + "External id": 519229,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158179.125, "dur": 1.140, + "args": { + "External id": 519230,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158179.523, "dur": 0.676, + "args": { + "External id": 519231,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158180.495, "dur": 2.284, + "args": { + "External id": 519232,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158182.019, "dur": 0.695, + "args": { + "External id": 519233,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158183.006, "dur": 2.881, + "args": { + "External id": 519234,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158185.091, "dur": 0.730, + "args": { + "External id": 519235,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158186.122, "dur": 1.014, + "args": { + "External id": 519236,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158186.481, "dur": 0.591, + "args": { + "External id": 519237,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158187.366, "dur": 4.366, + "args": { + "External id": 519238,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158189.812, "dur": 1.849, + "args": { + "External id": 519239,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158191.985, "dur": 1.059, + "args": { + "External id": 519240,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158192.352, "dur": 0.626, + "args": { + "External id": 519241,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158193.335, "dur": 2.325, + "args": { + "External id": 519242,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158194.885, "dur": 0.699, + "args": { + "External id": 519243,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158195.886, "dur": 2.627, + "args": { + "External id": 519244,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158197.755, "dur": 0.690, + "args": { + "External id": 519245,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158200.704, "dur": 1.070, + "args": { + "External id": 519246,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158201.072, "dur": 0.630, + "args": { + "External id": 519247,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158202.026, "dur": 2.372, + "args": { + "External id": 519248,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158203.642, "dur": 0.691, + "args": { + "External id": 519249,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158204.634, "dur": 3.040, + "args": { + "External id": 519250,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158207.010, "dur": 0.599, + "args": { + "External id": 519251,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158207.902, "dur": 1.217, + "args": { + "External id": 519252,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158208.260, "dur": 0.791, + "args": { + "External id": 519253,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158209.363, "dur": 4.412, + "args": { + "External id": 519254,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158211.198, "dur": 2.313, + "args": { + "External id": 519255,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158214.025, "dur": 1.135, + "args": { + "External id": 519256,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158214.380, "dur": 0.516, + "args": { + "External id": 519257,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158215.456, "dur": 2.751, + "args": { + "External id": 519258,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158217.491, "dur": 0.650, + "args": { + "External id": 519259,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158218.433, "dur": 2.836, + "args": { + "External id": 519260,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158220.564, "dur": 0.634, + "args": { + "External id": 519261,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158223.331, "dur": 1.088, + "args": { + "External id": 519262,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158223.699, "dur": 0.650, + "args": { + "External id": 519263,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158224.649, "dur": 2.153, + "args": { + "External id": 519264,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158225.975, "dur": 0.760, + "args": { + "External id": 519265,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158227.036, "dur": 2.920, + "args": { + "External id": 519266,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158229.268, "dur": 0.617, + "args": { + "External id": 519267,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158230.196, "dur": 1.155, + "args": { + "External id": 519268,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158230.562, "dur": 0.719, + "args": { + "External id": 519269,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158231.586, "dur": 3.506, + "args": { + "External id": 519270,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158233.306, "dur": 1.712, + "args": { + "External id": 519271,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158235.355, "dur": 1.273, + "args": { + "External id": 519272,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158235.752, "dur": 0.806, + "args": { + "External id": 519273,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158236.904, "dur": 3.132, + "args": { + "External id": 519274,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158238.820, "dur": 1.150, + "args": { + "External id": 519275,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158240.262, "dur": 3.296, + "args": { + "External id": 519276,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158242.792, "dur": 0.701, + "args": { + "External id": 519277,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158245.802, "dur": 1.131, + "args": { + "External id": 519278,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158246.170, "dur": 0.696, + "args": { + "External id": 519279,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158247.160, "dur": 4.595, + "args": { + "External id": 519280,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158249.454, "dur": 2.037, + "args": { + "External id": 519281,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158252.008, "dur": 1.190, + "args": { + "External id": 519282,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158252.393, "dur": 0.740, + "args": { + "External id": 519283,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158253.640, "dur": 1.100, + "args": { + "External id": 519284,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158254.009, "dur": 0.668, + "args": { + "External id": 519285,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158254.982, "dur": 4.644, + "args": { + "External id": 519286,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158258.766, "dur": 0.789, + "args": { + "External id": 519287,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158259.877, "dur": 1.177, + "args": { + "External id": 519288,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158260.243, "dur": 0.528, + "args": { + "External id": 519289,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158261.278, "dur": 1.759, + "args": { + "External id": 519290,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158262.527, "dur": 0.443, + "args": { + "External id": 519291,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158263.271, "dur": 2.589, + "args": { + "External id": 519292,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158265.091, "dur": 0.703, + "args": { + "External id": 519293,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158267.890, "dur": 1.212, + "args": { + "External id": 519294,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158268.259, "dur": 0.778, + "args": { + "External id": 519295,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158269.359, "dur": 3.550, + "args": { + "External id": 519296,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158271.278, "dur": 1.560, + "args": { + "External id": 519297,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158273.143, "dur": 1.219, + "args": { + "External id": 519298,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158273.501, "dur": 0.798, + "args": { + "External id": 519299,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158274.611, "dur": 1.428, + "args": { + "External id": 519300,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158274.979, "dur": 0.894, + "args": { + "External id": 519301,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158276.271, "dur": 4.468, + "args": { + "External id": 519302,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158279.990, "dur": 0.681, + "args": { + "External id": 519303,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158280.981, "dur": 1.020, + "args": { + "External id": 519304,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158281.356, "dur": 0.576, + "args": { + "External id": 519305,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158282.244, "dur": 2.774, + "args": { + "External id": 519306,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158284.176, "dur": 0.774, + "args": { + "External id": 519307,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158285.267, "dur": 2.822, + "args": { + "External id": 519308,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158287.466, "dur": 0.556, + "args": { + "External id": 519309,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158290.456, "dur": 1.104, + "args": { + "External id": 519310,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158290.839, "dur": 0.654, + "args": { + "External id": 519311,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158291.825, "dur": 4.110, + "args": { + "External id": 519312,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158293.676, "dur": 2.186, + "args": { + "External id": 519313,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158296.182, "dur": 1.004, + "args": { + "External id": 519314,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158296.547, "dur": 0.571, + "args": { + "External id": 519315,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158297.414, "dur": 1.334, + "args": { + "External id": 519316,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158297.771, "dur": 0.908, + "args": { + "External id": 519317,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158298.979, "dur": 4.247, + "args": { + "External id": 519318,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158302.486, "dur": 0.670, + "args": { + "External id": 519319,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158303.466, "dur": 1.150, + "args": { + "External id": 519320,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158303.822, "dur": 0.509, + "args": { + "External id": 519321,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158304.853, "dur": 2.834, + "args": { + "External id": 519322,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158306.677, "dur": 0.943, + "args": { + "External id": 519323,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158307.963, "dur": 3.295, + "args": { + "External id": 519324,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158310.435, "dur": 0.751, + "args": { + "External id": 519325,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158313.338, "dur": 0.903, + "args": { + "External id": 519326,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158313.710, "dur": 0.461, + "args": { + "External id": 519327,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158314.470, "dur": 3.863, + "args": { + "External id": 519328,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158316.185, "dur": 2.078, + "args": { + "External id": 519329,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158318.580, "dur": 1.118, + "args": { + "External id": 519330,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158318.946, "dur": 0.599, + "args": { + "External id": 519331,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158320.178, "dur": 1.393, + "args": { + "External id": 519332,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158320.546, "dur": 0.959, + "args": { + "External id": 519333,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158322.035, "dur": 3.723, + "args": { + "External id": 519334,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158325.139, "dur": 0.548, + "args": { + "External id": 519335,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158326.002, "dur": 1.108, + "args": { + "External id": 519336,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158326.358, "dur": 0.682, + "args": { + "External id": 519337,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158327.377, "dur": 2.843, + "args": { + "External id": 519338,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158329.442, "dur": 0.712, + "args": { + "External id": 519339,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158330.496, "dur": 3.006, + "args": { + "External id": 519340,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158332.643, "dur": 0.793, + "args": { + "External id": 519341,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158335.669, "dur": 0.935, + "args": { + "External id": 519342,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158336.036, "dur": 0.500, + "args": { + "External id": 519343,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158336.829, "dur": 3.750, + "args": { + "External id": 519344,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158338.793, "dur": 1.714, + "args": { + "External id": 519345,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158340.807, "dur": 1.092, + "args": { + "External id": 519346,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158341.161, "dur": 0.565, + "args": { + "External id": 519347,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158342.130, "dur": 1.324, + "args": { + "External id": 519348,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158342.513, "dur": 0.870, + "args": { + "External id": 519349,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158343.678, "dur": 3.114, + "args": { + "External id": 519350,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158346.073, "dur": 0.647, + "args": { + "External id": 519351,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158347.064, "dur": 1.739, + "args": { + "External id": 519352,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158347.958, "dur": 0.778, + "args": { + "External id": 519353,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158349.085, "dur": 1.888, + "args": { + "External id": 519354,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158350.286, "dur": 0.616, + "args": { + "External id": 519355,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158351.233, "dur": 2.555, + "args": { + "External id": 519356,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158353.200, "dur": 0.520, + "args": { + "External id": 519357,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158355.980, "dur": 1.777, + "args": { + "External id": 519358,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158357.100, "dur": 0.589, + "args": { + "External id": 519359,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158358.007, "dur": 2.861, + "args": { + "External id": 519360,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158359.043, "dur": 1.750, + "args": { + "External id": 519361,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158361.122, "dur": 1.667, + "args": { + "External id": 519362,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158361.896, "dur": 0.608, + "args": { + "External id": 519363,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158363.051, "dur": 2.096, + "args": { + "External id": 519364,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158364.160, "dur": 0.727, + "args": { + "External id": 519365,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158365.427, "dur": 3.113, + "args": { + "External id": 519366,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158367.885, "dur": 0.587, + "args": { + "External id": 519367,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158368.840, "dur": 2.096, + "args": { + "External id": 519368,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158370.296, "dur": 0.573, + "args": { + "External id": 519369,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158371.189, "dur": 1.420, + "args": { + "External id": 519370,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158371.896, "dur": 0.643, + "args": { + "External id": 519371,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158372.886, "dur": 2.841, + "args": { + "External id": 519372,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158375.018, "dur": 0.637, + "args": { + "External id": 519373,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158378.187, "dur": 1.669, + "args": { + "External id": 519374,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158379.127, "dur": 0.660, + "args": { + "External id": 519375,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158380.194, "dur": 3.870, + "args": { + "External id": 519376,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158381.576, "dur": 2.211, + "args": { + "External id": 519377,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158384.320, "dur": 1.460, + "args": { + "External id": 519378,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158385.009, "dur": 0.616, + "args": { + "External id": 519379,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158386.136, "dur": 1.765, + "args": { + "External id": 519380,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158387.112, "dur": 0.720, + "args": { + "External id": 519381,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158388.152, "dur": 3.288, + "args": { + "External id": 519382,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158390.931, "dur": 0.439, + "args": { + "External id": 519383,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158391.727, "dur": 2.019, + "args": { + "External id": 519384,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158392.820, "dur": 0.621, + "args": { + "External id": 519385,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158394.014, "dur": 1.622, + "args": { + "External id": 519386,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158394.873, "dur": 0.694, + "args": { + "External id": 519387,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158395.894, "dur": 3.737, + "args": { + "External id": 519388,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158398.704, "dur": 0.665, + "args": { + "External id": 519389,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158401.627, "dur": 1.838, + "args": { + "External id": 519390,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158402.678, "dur": 0.716, + "args": { + "External id": 519391,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158403.720, "dur": 2.667, + "args": { + "External id": 519392,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158404.522, "dur": 1.792, + "args": { + "External id": 519393,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158406.699, "dur": 1.302, + "args": { + "External id": 519394,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158407.492, "dur": 0.439, + "args": { + "External id": 519395,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158408.254, "dur": 1.869, + "args": { + "External id": 519396,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158409.178, "dur": 0.671, + "args": { + "External id": 519397,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158410.376, "dur": 3.816, + "args": { + "External id": 519398,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158413.483, "dur": 0.637, + "args": { + "External id": 519399,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158414.497, "dur": 1.814, + "args": { + "External id": 519400,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158415.607, "dur": 0.637, + "args": { + "External id": 519401,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158416.650, "dur": 2.067, + "args": { + "External id": 519402,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158417.847, "dur": 0.802, + "args": { + "External id": 519403,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158419.001, "dur": 3.389, + "args": { + "External id": 519404,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158421.713, "dur": 0.607, + "args": { + "External id": 519405,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158424.479, "dur": 1.697, + "args": { + "External id": 519406,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158425.159, "dur": 0.948, + "args": { + "External id": 519407,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158426.429, "dur": 2.896, + "args": { + "External id": 519408,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158427.680, "dur": 1.570, + "args": { + "External id": 519409,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158429.664, "dur": 1.776, + "args": { + "External id": 519410,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158430.856, "dur": 0.511, + "args": { + "External id": 519411,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158431.702, "dur": 1.832, + "args": { + "External id": 519412,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158432.821, "dur": 0.644, + "args": { + "External id": 519413,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158433.810, "dur": 3.675, + "args": { + "External id": 519414,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158436.681, "dur": 0.735, + "args": { + "External id": 519415,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158437.791, "dur": 2.061, + "args": { + "External id": 519416,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158439.101, "dur": 0.681, + "args": { + "External id": 519417,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158440.181, "dur": 1.693, + "args": { + "External id": 519418,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158440.941, "dur": 0.862, + "args": { + "External id": 519419,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158442.137, "dur": 3.083, + "args": { + "External id": 519420,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158444.550, "dur": 0.600, + "args": { + "External id": 519421,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158447.168, "dur": 1.691, + "args": { + "External id": 519422,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158448.043, "dur": 0.748, + "args": { + "External id": 519423,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158449.110, "dur": 3.275, + "args": { + "External id": 519424,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158450.318, "dur": 1.994, + "args": { + "External id": 519425,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158452.638, "dur": 1.280, + "args": { + "External id": 519426,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158453.385, "dur": 0.462, + "args": { + "External id": 519427,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158454.174, "dur": 1.593, + "args": { + "External id": 519428,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158455.042, "dur": 0.659, + "args": { + "External id": 519429,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158456.018, "dur": 3.464, + "args": { + "External id": 519430,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158458.736, "dur": 0.675, + "args": { + "External id": 519431,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158459.796, "dur": 2.082, + "args": { + "External id": 519432,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158461.188, "dur": 0.621, + "args": { + "External id": 519433,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158462.162, "dur": 1.789, + "args": { + "External id": 519434,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158463.089, "dur": 0.792, + "args": { + "External id": 519435,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158464.202, "dur": 3.226, + "args": { + "External id": 519436,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158466.847, "dur": 0.511, + "args": { + "External id": 519437,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158469.382, "dur": 1.891, + "args": { + "External id": 519438,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158470.387, "dur": 0.818, + "args": { + "External id": 519439,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158471.531, "dur": 2.947, + "args": { + "External id": 519440,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158472.645, "dur": 1.755, + "args": { + "External id": 519441,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158474.793, "dur": 1.927, + "args": { + "External id": 519442,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158475.932, "dur": 0.620, + "args": { + "External id": 519443,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158477.138, "dur": 1.802, + "args": { + "External id": 519444,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158478.114, "dur": 0.755, + "args": { + "External id": 519445,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158479.203, "dur": 3.458, + "args": { + "External id": 519446,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158481.892, "dur": 0.698, + "args": { + "External id": 519447,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158483.002, "dur": 1.975, + "args": { + "External id": 519448,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158484.262, "dur": 0.645, + "args": { + "External id": 519449,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158485.253, "dur": 1.424, + "args": { + "External id": 519450,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158485.907, "dur": 0.704, + "args": { + "External id": 519451,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158486.952, "dur": 3.520, + "args": { + "External id": 519452,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158489.714, "dur": 0.691, + "args": { + "External id": 519453,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158493.083, "dur": 1.928, + "args": { + "External id": 519454,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158494.230, "dur": 0.541, + "args": { + "External id": 519455,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158495.289, "dur": 3.417, + "args": { + "External id": 519456,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158496.496, "dur": 2.139, + "args": { + "External id": 519457,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158499.001, "dur": 1.815, + "args": { + "External id": 519458,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158500.169, "dur": 0.575, + "args": { + "External id": 519459,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158501.075, "dur": 1.948, + "args": { + "External id": 519460,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158502.015, "dur": 0.942, + "args": { + "External id": 519461,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158503.321, "dur": 3.272, + "args": { + "External id": 519462,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158505.908, "dur": 0.613, + "args": { + "External id": 519463,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158506.866, "dur": 2.375, + "args": { + "External id": 519464,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158508.139, "dur": 0.828, + "args": { + "External id": 519465,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158509.530, "dur": 1.575, + "args": { + "External id": 519466,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158510.384, "dur": 0.654, + "args": { + "External id": 519467,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158511.387, "dur": 3.943, + "args": { + "External id": 519468,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158514.435, "dur": 0.824, + "args": { + "External id": 519469,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158517.701, "dur": 1.757, + "args": { + "External id": 519470,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158518.519, "dur": 0.869, + "args": { + "External id": 519471,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158519.733, "dur": 3.523, + "args": { + "External id": 519472,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158521.214, "dur": 1.954, + "args": { + "External id": 519473,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158523.524, "dur": 1.445, + "args": { + "External id": 519474,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158524.212, "dur": 0.692, + "args": { + "External id": 519475,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158525.341, "dur": 1.908, + "args": { + "External id": 519476,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158526.309, "dur": 0.874, + "args": { + "External id": 519477,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158527.521, "dur": 2.710, + "args": { + "External id": 519478,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158529.610, "dur": 0.551, + "args": { + "External id": 519479,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158530.543, "dur": 2.228, + "args": { + "External id": 519480,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158532.082, "dur": 0.620, + "args": { + "External id": 519481,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158533.030, "dur": 1.925, + "args": { + "External id": 519482,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158534.148, "dur": 0.737, + "args": { + "External id": 519483,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158535.213, "dur": 3.297, + "args": { + "External id": 519484,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158537.898, "dur": 0.544, + "args": { + "External id": 519485,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158540.376, "dur": 1.231, + "args": { + "External id": 519486,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158541.045, "dur": 0.494, + "args": { + "External id": 519487,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158541.874, "dur": 3.977, + "args": { + "External id": 519488,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158543.653, "dur": 2.032, + "args": { + "External id": 519489,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158546.119, "dur": 1.436, + "args": { + "External id": 519490,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158546.784, "dur": 0.701, + "args": { + "External id": 519491,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158547.814, "dur": 2.787, + "args": { + "External id": 519492,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158548.727, "dur": 1.807, + "args": { + "External id": 519493,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158550.896, "dur": 3.280, + "args": { + "External id": 519494,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158553.349, "dur": 0.758, + "args": { + "External id": 519495,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158554.456, "dur": 1.499, + "args": { + "External id": 519496,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158555.361, "dur": 0.526, + "args": { + "External id": 519497,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158556.246, "dur": 1.458, + "args": { + "External id": 519498,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158557.058, "dur": 0.576, + "args": { + "External id": 519499,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158557.958, "dur": 3.684, + "args": { + "External id": 519500,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158561.096, "dur": 0.474, + "args": { + "External id": 519501,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158564.002, "dur": 1.669, + "args": { + "External id": 519502,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158564.962, "dur": 0.640, + "args": { + "External id": 519503,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158565.940, "dur": 3.122, + "args": { + "External id": 519504,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158567.058, "dur": 1.931, + "args": { + "External id": 519505,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158569.358, "dur": 1.670, + "args": { + "External id": 519506,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158570.434, "dur": 0.523, + "args": { + "External id": 519507,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158571.296, "dur": 2.046, + "args": { + "External id": 519508,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158572.488, "dur": 0.786, + "args": { + "External id": 519509,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158573.597, "dur": 3.721, + "args": { + "External id": 519510,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158576.338, "dur": 0.917, + "args": { + "External id": 519511,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158577.621, "dur": 1.798, + "args": { + "External id": 519512,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158578.681, "dur": 0.660, + "args": { + "External id": 519513,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158579.683, "dur": 1.771, + "args": { + "External id": 519514,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158580.674, "dur": 0.714, + "args": { + "External id": 519515,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158581.831, "dur": 3.731, + "args": { + "External id": 519516,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158584.958, "dur": 0.534, + "args": { + "External id": 519517,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158599.496, "dur": 3.213, + "args": { + "External id": 519518,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158601.510, "dur": 0.880, + "args": { + "External id": 519519,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158603.137, "dur": 2.853, + "args": { + "External id": 519520,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158604.146, "dur": 1.771, + "args": { + "External id": 519521,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158606.261, "dur": 1.424, + "args": { + "External id": 519522,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158607.034, "dur": 0.581, + "args": { + "External id": 519523,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158608.015, "dur": 2.444, + "args": { + "External id": 519524,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158609.512, "dur": 0.877, + "args": { + "External id": 519525,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158610.774, "dur": 3.456, + "args": { + "External id": 519526,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158613.599, "dur": 0.542, + "args": { + "External id": 519527,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158614.565, "dur": 2.024, + "args": { + "External id": 519528,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158615.984, "dur": 0.536, + "args": { + "External id": 519529,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158616.891, "dur": 1.786, + "args": { + "External id": 519530,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158617.998, "dur": 0.610, + "args": { + "External id": 519531,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158619.171, "dur": 3.678, + "args": { + "External id": 519532,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158622.307, "dur": 0.473, + "args": { + "External id": 519533,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158625.461, "dur": 1.543, + "args": { + "External id": 519534,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158626.287, "dur": 0.645, + "args": { + "External id": 519535,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158627.293, "dur": 4.581, + "args": { + "External id": 519536,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158629.842, "dur": 1.870, + "args": { + "External id": 519537,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158632.132, "dur": 1.708, + "args": { + "External id": 519538,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158632.929, "dur": 0.840, + "args": { + "External id": 519539,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158634.096, "dur": 2.085, + "args": { + "External id": 519540,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158635.408, "dur": 0.705, + "args": { + "External id": 519541,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158636.460, "dur": 3.217, + "args": { + "External id": 519542,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158639.103, "dur": 0.505, + "args": { + "External id": 519543,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158639.961, "dur": 2.286, + "args": { + "External id": 519544,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158641.236, "dur": 0.944, + "args": { + "External id": 519545,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158642.541, "dur": 1.350, + "args": { + "External id": 519546,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158643.152, "dur": 0.673, + "args": { + "External id": 519547,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158644.161, "dur": 3.413, + "args": { + "External id": 519548,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158646.749, "dur": 0.753, + "args": { + "External id": 519549,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158649.886, "dur": 1.904, + "args": { + "External id": 519550,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158651.157, "dur": 0.563, + "args": { + "External id": 519551,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158652.214, "dur": 3.255, + "args": { + "External id": 519552,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158653.749, "dur": 1.647, + "args": { + "External id": 519553,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158655.736, "dur": 1.731, + "args": { + "External id": 519554,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158656.636, "dur": 0.761, + "args": { + "External id": 519555,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158658.014, "dur": 1.672, + "args": { + "External id": 519556,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158658.942, "dur": 0.677, + "args": { + "External id": 519557,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158659.963, "dur": 4.085, + "args": { + "External id": 519558,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158663.368, "dur": 0.609, + "args": { + "External id": 519559,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158664.345, "dur": 2.053, + "args": { + "External id": 519560,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158665.552, "dur": 0.779, + "args": { + "External id": 519561,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158666.861, "dur": 1.434, + "args": { + "External id": 519562,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158667.429, "dur": 0.800, + "args": { + "External id": 519563,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158668.574, "dur": 3.695, + "args": { + "External id": 519564,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158671.445, "dur": 0.753, + "args": { + "External id": 519565,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158674.896, "dur": 1.442, + "args": { + "External id": 519566,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158675.640, "dur": 0.629, + "args": { + "External id": 519567,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158676.589, "dur": 3.525, + "args": { + "External id": 519568,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158678.218, "dur": 1.823, + "args": { + "External id": 519569,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158680.416, "dur": 1.575, + "args": { + "External id": 519570,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158681.183, "dur": 0.740, + "args": { + "External id": 519571,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158682.254, "dur": 2.114, + "args": { + "External id": 519572,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158683.691, "dur": 0.607, + "args": { + "External id": 519573,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158684.618, "dur": 3.019, + "args": { + "External id": 519574,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158686.817, "dur": 0.754, + "args": { + "External id": 519575,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158687.956, "dur": 1.893, + "args": { + "External id": 519576,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158689.192, "dur": 0.590, + "args": { + "External id": 519577,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158690.103, "dur": 1.826, + "args": { + "External id": 519578,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158691.154, "dur": 0.708, + "args": { + "External id": 519579,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158692.182, "dur": 3.865, + "args": { + "External id": 519580,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158695.168, "dur": 0.810, + "args": { + "External id": 519581,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158698.096, "dur": 1.652, + "args": { + "External id": 519582,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158699.111, "dur": 0.570, + "args": { + "External id": 519583,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158700.021, "dur": 3.422, + "args": { + "External id": 519584,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158701.454, "dur": 1.915, + "args": { + "External id": 519585,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158703.696, "dur": 1.587, + "args": { + "External id": 519586,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158704.759, "dur": 0.460, + "args": { + "External id": 519587,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158705.534, "dur": 1.717, + "args": { + "External id": 519588,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158706.435, "dur": 0.748, + "args": { + "External id": 519589,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158707.528, "dur": 3.424, + "args": { + "External id": 519590,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158710.347, "dur": 0.536, + "args": { + "External id": 519591,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158711.228, "dur": 1.762, + "args": { + "External id": 519592,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158712.465, "dur": 0.456, + "args": { + "External id": 519593,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158713.251, "dur": 1.684, + "args": { + "External id": 519594,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158714.047, "dur": 0.823, + "args": { + "External id": 519595,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158715.254, "dur": 4.080, + "args": { + "External id": 519596,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158718.720, "dur": 0.544, + "args": { + "External id": 519597,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158721.822, "dur": 1.636, + "args": { + "External id": 519598,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158722.794, "dur": 0.596, + "args": { + "External id": 519599,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158723.711, "dur": 3.238, + "args": { + "External id": 519600,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158725.001, "dur": 1.873, + "args": { + "External id": 519601,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158727.216, "dur": 1.413, + "args": { + "External id": 519602,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158727.927, "dur": 0.636, + "args": { + "External id": 519603,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158728.898, "dur": 2.165, + "args": { + "External id": 519604,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158730.097, "dur": 0.896, + "args": { + "External id": 519605,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158731.338, "dur": 3.632, + "args": { + "External id": 519606,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158734.206, "dur": 0.695, + "args": { + "External id": 519607,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158735.308, "dur": 1.628, + "args": { + "External id": 519608,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158736.430, "dur": 0.436, + "args": { + "External id": 519609,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158737.226, "dur": 1.614, + "args": { + "External id": 519610,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158738.091, "dur": 0.684, + "args": { + "External id": 519611,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158739.105, "dur": 3.962, + "args": { + "External id": 519612,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158742.203, "dur": 0.798, + "args": { + "External id": 519613,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158745.659, "dur": 1.514, + "args": { + "External id": 519614,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158746.530, "dur": 0.575, + "args": { + "External id": 519615,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158747.545, "dur": 2.778, + "args": { + "External id": 519616,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158748.533, "dur": 1.717, + "args": { + "External id": 519617,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158750.585, "dur": 1.917, + "args": { + "External id": 519618,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158751.445, "dur": 0.992, + "args": { + "External id": 519619,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158753.100, "dur": 1.921, + "args": { + "External id": 519620,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158754.290, "dur": 0.661, + "args": { + "External id": 519621,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158755.294, "dur": 3.047, + "args": { + "External id": 519622,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158757.850, "dur": 0.422, + "args": { + "External id": 519623,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158758.616, "dur": 1.921, + "args": { + "External id": 519624,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158760.010, "dur": 0.458, + "args": { + "External id": 519625,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158761.068, "dur": 1.506, + "args": { + "External id": 519626,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158761.859, "dur": 0.649, + "args": { + "External id": 519627,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158762.937, "dur": 3.628, + "args": { + "External id": 519628,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158765.855, "dur": 0.639, + "args": { + "External id": 519629,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158768.873, "dur": 5.316, + "args": { + "External id": 519630,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158773.671, "dur": 0.376, + "args": { + "External id": 519631,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158774.471, "dur": 2.967, + "args": { + "External id": 519632,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158775.615, "dur": 1.750, + "args": { + "External id": 519633,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158777.707, "dur": 1.490, + "args": { + "External id": 519634,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158778.597, "dur": 0.533, + "args": { + "External id": 519635,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158779.734, "dur": 2.076, + "args": { + "External id": 519636,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158781.065, "dur": 0.673, + "args": { + "External id": 519637,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158782.062, "dur": 2.736, + "args": { + "External id": 519638,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158784.103, "dur": 0.631, + "args": { + "External id": 519639,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158785.154, "dur": 1.795, + "args": { + "External id": 519640,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158786.447, "dur": 0.432, + "args": { + "External id": 519641,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158787.224, "dur": 1.227, + "args": { + "External id": 519642,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158787.870, "dur": 0.516, + "args": { + "External id": 519643,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158788.757, "dur": 3.571, + "args": { + "External id": 519644,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158791.750, "dur": 0.507, + "args": { + "External id": 519645,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158794.648, "dur": 1.383, + "args": { + "External id": 519646,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158795.346, "dur": 0.617, + "args": { + "External id": 519647,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158796.359, "dur": 3.299, + "args": { + "External id": 519648,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158797.759, "dur": 1.825, + "args": { + "External id": 519649,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158799.964, "dur": 1.667, + "args": { + "External id": 519650,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158801.024, "dur": 0.540, + "args": { + "External id": 519651,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158802.384, "dur": 2.001, + "args": { + "External id": 519652,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158803.582, "dur": 0.736, + "args": { + "External id": 519653,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158804.636, "dur": 2.923, + "args": { + "External id": 519654,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158806.981, "dur": 0.509, + "args": { + "External id": 519655,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158807.850, "dur": 1.780, + "args": { + "External id": 519656,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158809.014, "dur": 0.550, + "args": { + "External id": 519657,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158810.001, "dur": 1.854, + "args": { + "External id": 519658,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158811.095, "dur": 0.692, + "args": { + "External id": 519659,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158812.291, "dur": 3.613, + "args": { + "External id": 519660,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158815.297, "dur": 0.536, + "args": { + "External id": 519661,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158818.398, "dur": 1.584, + "args": { + "External id": 519662,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158819.387, "dur": 0.527, + "args": { + "External id": 519663,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158820.267, "dur": 3.395, + "args": { + "External id": 519664,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158821.523, "dur": 2.064, + "args": { + "External id": 519665,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158823.916, "dur": 1.336, + "args": { + "External id": 519666,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158824.582, "dur": 0.605, + "args": { + "External id": 519667,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158825.519, "dur": 2.351, + "args": { + "External id": 519668,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158826.920, "dur": 0.883, + "args": { + "External id": 519669,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158828.331, "dur": 3.457, + "args": { + "External id": 519670,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158831.065, "dur": 0.653, + "args": { + "External id": 519671,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158832.074, "dur": 1.527, + "args": { + "External id": 519672,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158832.941, "dur": 0.596, + "args": { + "External id": 519673,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158833.874, "dur": 1.789, + "args": { + "External id": 519674,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158834.777, "dur": 0.820, + "args": { + "External id": 519675,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158835.913, "dur": 3.716, + "args": { + "External id": 519676,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158838.635, "dur": 0.926, + "args": { + "External id": 519677,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158842.103, "dur": 1.727, + "args": { + "External id": 519678,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158842.978, "dur": 0.787, + "args": { + "External id": 519679,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158844.103, "dur": 27.604, + "args": { + "External id": 519680,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158869.283, "dur": 2.339, + "args": { + "External id": 519681,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158872.011, "dur": 1.453, + "args": { + "External id": 519682,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158872.728, "dur": 0.671, + "args": { + "External id": 519683,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158873.780, "dur": 1.907, + "args": { + "External id": 519684,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158874.841, "dur": 0.781, + "args": { + "External id": 519685,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158875.991, "dur": 3.470, + "args": { + "External id": 519686,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158878.394, "dur": 0.998, + "args": { + "External id": 519687,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158879.747, "dur": 1.509, + "args": { + "External id": 519688,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158880.560, "dur": 0.629, + "args": { + "External id": 519689,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158881.536, "dur": 1.530, + "args": { + "External id": 519690,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158882.266, "dur": 0.731, + "args": { + "External id": 519691,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158883.467, "dur": 3.814, + "args": { + "External id": 519692,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158886.571, "dur": 0.639, + "args": { + "External id": 519693,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158890.014, "dur": 1.515, + "args": { + "External id": 519694,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158890.931, "dur": 0.533, + "args": { + "External id": 519695,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158891.823, "dur": 2.671, + "args": { + "External id": 519696,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158892.790, "dur": 1.630, + "args": { + "External id": 519697,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158894.793, "dur": 1.640, + "args": { + "External id": 519698,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158895.696, "dur": 0.669, + "args": { + "External id": 519699,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158896.687, "dur": 2.164, + "args": { + "External id": 519700,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158898.098, "dur": 0.688, + "args": { + "External id": 519701,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158899.115, "dur": 2.818, + "args": { + "External id": 519702,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158901.277, "dur": 0.586, + "args": { + "External id": 519703,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158902.204, "dur": 1.830, + "args": { + "External id": 519704,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158903.413, "dur": 0.555, + "args": { + "External id": 519705,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158904.321, "dur": 1.504, + "args": { + "External id": 519706,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158905.063, "dur": 0.696, + "args": { + "External id": 519707,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158906.206, "dur": 3.079, + "args": { + "External id": 519708,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158908.637, "dur": 0.581, + "args": { + "External id": 519709,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158911.603, "dur": 1.752, + "args": { + "External id": 519710,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158912.666, "dur": 0.617, + "args": { + "External id": 519711,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158913.604, "dur": 3.128, + "args": { + "External id": 519712,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158914.694, "dur": 1.966, + "args": { + "External id": 519713,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338711, "tid": 2338711, + "ts": 6274147158917.021, "dur": 1.377, + "args": { + "External id": 519714,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147158917.784, "dur": 0.553, + "args": { + "External id": 519715,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2338711, "tid": 2338711, + "ts": 6274147158938.757, "dur": 159.549, + "args": { + "External id": 519716,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 2338711, "tid": 2338711, + "ts": 6274147159220.687, "dur": 147.414, + "args": { + "External id": 519717,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[273], [], [], [], []], "Ev Idx": 21609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 2338711, "tid": 2338711, + "ts": 6274147159292.264, "dur": 56.474, + "args": { + "External id": 519718,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[273], [], [], [], []], "Ev Idx": 21610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338711, "tid": 2338711, + "ts": 6274147159313.268, "dur": 1.110, + "args": { + "External id": 519719,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Redistribute", "pid": 2338711, "tid": 2338711, + "ts": 6274147159759.381, "dur": 1060.356, + "args": { + "External id": 519720,"Sequence number": 5516474, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "False"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2338711, "tid": 2338711, + "ts": 6274147159821.722, "dur": 69.359, + "args": { + "External id": 519721,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147159829.316, "dur": 1.830, + "args": { + "External id": 519722,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147159833.439, "dur": 0.617, + "args": { + "External id": 519723,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 21615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::all_reduce", "pid": 2338711, "tid": 2338711, + "ts": 6274147159960.550, "dur": 502.250, + "args": { + "External id": 519724,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["float", "", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 21616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338711, "tid": 2338711, + "ts": 6274147159965.227, "dur": 50.392, + "args": { + "External id": 519725,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338711, "tid": 2338711, + "ts": 6274147159970.498, "dur": 11.242, + "args": { + "External id": 519726,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "0"], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 21618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274147159975.281, "dur": 5.547, + "args": { + "External id": 519727,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 21619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338711, "tid": 2338711, + "ts": 6274147159983.292, "dur": 31.702, + "args": { + "External id": 519728,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 21620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::allreduce_", "pid": 2338711, "tid": 2338711, + "ts": 6274147160025.382, "dur": 432.949, + "args": { + "External id": 519729,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "-1"], "Input type": ["TensorList", "", "", "", "Scalar"], "Input Strides": [[[]], [], [], [], []], "Input Dims": [[[]], [], [], [], []], "Ev Idx": 21621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274147160093.714, "dur": 356.802, + "args": { + "External id": 519730,"Record function id": 0, "Collective name": "allreduce", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[[]], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1, "Process Group Name": "0", "Input type": ["TensorList", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[[]], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 21622, "In msg nelems": 1 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:all_reduce", "pid": 2338711, "tid": 2338711, + "ts": 6274147160109.201, "dur": 335.592, + "args": { + "External id": 519731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2338711, "tid": 2338711, + "ts": 6274147160538.331, "dur": 243.941, + "args": { + "External id": 519732,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::wait_tensor", "pid": 2338711, "tid": 2338711, + "ts": 6274147160662.299, "dur": 40.386, + "args": { + "External id": 519733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338711, "tid": 2338711, + "ts": 6274147160686.422, "dur": 6.167, + "args": { + "External id": 519734,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 21626, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2338711, "tid": 2338711, + "ts": 6274147160728.798, "dur": 46.879, + "args": { + "External id": 519735,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147160732.356, "dur": 1.197, + "args": { + "External id": 519736,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147160735.072, "dur": 0.667, + "args": { + "External id": 519737,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 21629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_ToTorchTensor", "pid": 2338711, "tid": 2338711, + "ts": 6274147160837.577, "dur": 23.438, + "args": { + "External id": 519738,"Sequence number": 5516475, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338711, "tid": 2338711, + "ts": 6274147160849.169, "dur": 8.667, + "args": { + "External id": 519739,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338711, "tid": 2338711, + "ts": 6274147160851.922, "dur": 5.745, + "args": { + "External id": 519740,"Record function id": 0, "Concrete Inputs": ["", "[]"], "Input type": ["float", "ScalarList"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338711, "tid": 2338711, + "ts": 6274147161254.321, "dur": 37.300, + "args": { + "External id": 519741,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "double", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 21633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reciprocal", "pid": 2338711, "tid": 2338711, + "ts": 6274147161302.579, "dur": 23.494, + "args": { + "External id": 519742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mul", "pid": 2338711, "tid": 2338711, + "ts": 6274147161334.276, "dur": 22.467, + "args": { + "External id": 519743,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "double"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clamp", "pid": 2338711, "tid": 2338711, + "ts": 6274147161371.161, "dur": 29.584, + "args": { + "External id": 519744,"Record function id": 0, "Concrete Inputs": ["", "", "1."], "Input type": ["float", "", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 21636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147161375.384, "dur": 0.655, + "args": { + "External id": 519745,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 21637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338711, "tid": 2338711, + "ts": 6274147161417.741, "dur": 0.550, + "args": { + "External id": 519746,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 2338711, "tid": 2338711, + "ts": 6274147161546.290, "dur": 1155.240, + "args": { + "External id": 519747,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 2338711, "tid": 2338711, + "ts": 6274147161971.682, "dur": 685.058, + "args": { + "External id": 519748,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isnan", "pid": 2338711, "tid": 2338711, + "ts": 6274147162758.173, "dur": 32.443, + "args": { + "External id": 519749,"Sequence number": 5516476, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338711, "tid": 2338711, + "ts": 6274147162762.506, "dur": 27.531, + "args": { + "External id": 519750,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338711, "tid": 2338711, + "ts": 6274147162795.622, "dur": 9819.829, + "args": { + "External id": 519751,"Sequence number": 5516476, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338711, "tid": 2338711, + "ts": 6274147162797.594, "dur": 9817.270, + "args": { + "External id": 519752,"Sequence number": 5516476, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338711, "tid": 2338711, + "ts": 6274147162799.511, "dur": 9812.848, + "args": { + "External id": 519753,"Sequence number": 5516476, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isinf", "pid": 2338711, "tid": 2338711, + "ts": 6274147172629.471, "dur": 82.780, + "args": { + "External id": 519754,"Sequence number": 5516476, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274147172633.705, "dur": 51.406, + "args": { + "External id": 519755,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338711, "tid": 2338711, + "ts": 6274147172645.374, "dur": 4.814, + "args": { + "External id": 519756,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 21648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338711, "tid": 2338711, + "ts": 6274147172652.151, "dur": 32.614, + "args": { + "External id": 519757,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], [1]], "Input Dims": [[], [0]], "Ev Idx": 21649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338711, "tid": 2338711, + "ts": 6274147172661.658, "dur": 3.725, + "args": { + "External id": 519758,"Record function id": 0, "Concrete Inputs": ["", "[]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 21650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338711, "tid": 2338711, + "ts": 6274147172687.055, "dur": 24.291, + "args": { + "External id": 519759,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338711, "tid": 2338711, + "ts": 6274147172715.946, "dur": 41.327, + "args": { + "External id": 519760,"Sequence number": 5516476, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338711, "tid": 2338711, + "ts": 6274147172717.373, "dur": 39.736, + "args": { + "External id": 519761,"Sequence number": 5516476, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338711, "tid": 2338711, + "ts": 6274147172718.822, "dur": 37.980, + "args": { + "External id": 519762,"Sequence number": 5516476, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21654 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#OptimizersContainer.step", "pid": 2338711, "tid": 2338711, + "ts": 6274147172801.673, "dur": 5773.063, + "args": { + "External id": 519763,"Record function id": 0, "Ev Idx": 21655 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#AdamW.step", "pid": 2338711, "tid": 2338711, + "ts": 6274147172836.835, "dur": 5717.722, + "args": { + "External id": 519764,"Record function id": 0, "Ev Idx": 21656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_add_", "pid": 2338711, "tid": 2338711, + "ts": 6274147174157.268, "dur": 237.979, + "args": { + "External id": 519765,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174178.519, "dur": 1.366, + "args": { + "External id": 519766,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174181.074, "dur": 0.315, + "args": { + "External id": 519767,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174181.787, "dur": 0.469, + "args": { + "External id": 519768,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174183.028, "dur": 0.066, + "args": { + "External id": 519769,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174183.578, "dur": 0.306, + "args": { + "External id": 519770,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174184.415, "dur": 0.055, + "args": { + "External id": 519771,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174184.882, "dur": 0.304, + "args": { + "External id": 519772,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174185.627, "dur": 0.264, + "args": { + "External id": 519773,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174186.543, "dur": 0.293, + "args": { + "External id": 519774,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174187.240, "dur": 0.335, + "args": { + "External id": 519775,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174188.030, "dur": 0.263, + "args": { + "External id": 519776,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174188.559, "dur": 0.065, + "args": { + "External id": 519777,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174188.911, "dur": 0.076, + "args": { + "External id": 519778,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174189.439, "dur": 0.068, + "args": { + "External id": 519779,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174190.191, "dur": 0.085, + "args": { + "External id": 519780,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174190.814, "dur": 0.103, + "args": { + "External id": 519781,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174191.240, "dur": 0.100, + "args": { + "External id": 519782,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174191.720, "dur": 0.098, + "args": { + "External id": 519783,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174192.062, "dur": 0.086, + "args": { + "External id": 519784,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174192.562, "dur": 0.062, + "args": { + "External id": 519785,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174192.867, "dur": 0.053, + "args": { + "External id": 519786,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174193.259, "dur": 0.062, + "args": { + "External id": 519787,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174193.564, "dur": 0.052, + "args": { + "External id": 519788,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174194.017, "dur": 0.069, + "args": { + "External id": 519789,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174194.330, "dur": 0.054, + "args": { + "External id": 519790,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174194.801, "dur": 0.070, + "args": { + "External id": 519791,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174195.114, "dur": 0.054, + "args": { + "External id": 519792,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174195.547, "dur": 0.065, + "args": { + "External id": 519793,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174195.943, "dur": 0.068, + "args": { + "External id": 519794,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174196.419, "dur": 0.062, + "args": { + "External id": 519795,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174196.723, "dur": 0.057, + "args": { + "External id": 519796,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174197.178, "dur": 0.071, + "args": { + "External id": 519797,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174197.750, "dur": 0.053, + "args": { + "External id": 519798,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174198.207, "dur": 0.070, + "args": { + "External id": 519799,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174198.706, "dur": 0.069, + "args": { + "External id": 519800,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174199.157, "dur": 0.069, + "args": { + "External id": 519801,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174199.464, "dur": 0.054, + "args": { + "External id": 519802,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174199.914, "dur": 0.065, + "args": { + "External id": 519803,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174200.221, "dur": 0.052, + "args": { + "External id": 519804,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174200.675, "dur": 0.071, + "args": { + "External id": 519805,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174200.983, "dur": 0.057, + "args": { + "External id": 519806,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174201.458, "dur": 0.061, + "args": { + "External id": 519807,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174201.761, "dur": 0.052, + "args": { + "External id": 519808,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174202.327, "dur": 0.063, + "args": { + "External id": 519809,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174202.634, "dur": 0.049, + "args": { + "External id": 519810,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174203.092, "dur": 0.065, + "args": { + "External id": 519811,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174203.397, "dur": 0.109, + "args": { + "External id": 519812,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174203.891, "dur": 0.087, + "args": { + "External id": 519813,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174204.246, "dur": 0.277, + "args": { + "External id": 519814,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174204.919, "dur": 0.081, + "args": { + "External id": 519815,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174205.241, "dur": 0.068, + "args": { + "External id": 519816,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174205.763, "dur": 0.066, + "args": { + "External id": 519817,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174206.074, "dur": 0.053, + "args": { + "External id": 519818,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174206.487, "dur": 0.080, + "args": { + "External id": 519819,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174206.815, "dur": 0.065, + "args": { + "External id": 519820,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174207.164, "dur": 0.095, + "args": { + "External id": 519821,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174207.507, "dur": 0.334, + "args": { + "External id": 519822,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174208.167, "dur": 0.066, + "args": { + "External id": 519823,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174208.480, "dur": 0.050, + "args": { + "External id": 519824,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174208.944, "dur": 0.069, + "args": { + "External id": 519825,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174209.260, "dur": 0.051, + "args": { + "External id": 519826,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174209.748, "dur": 0.068, + "args": { + "External id": 519827,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174210.055, "dur": 0.049, + "args": { + "External id": 519828,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174210.487, "dur": 0.066, + "args": { + "External id": 519829,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174210.796, "dur": 0.052, + "args": { + "External id": 519830,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174211.264, "dur": 0.068, + "args": { + "External id": 519831,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174211.573, "dur": 0.054, + "args": { + "External id": 519832,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174212.047, "dur": 0.070, + "args": { + "External id": 519833,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174212.390, "dur": 0.056, + "args": { + "External id": 519834,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174212.846, "dur": 0.077, + "args": { + "External id": 519835,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174213.360, "dur": 0.060, + "args": { + "External id": 519836,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174213.711, "dur": 0.066, + "args": { + "External id": 519837,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174214.021, "dur": 0.052, + "args": { + "External id": 519838,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174214.447, "dur": 0.066, + "args": { + "External id": 519839,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174214.964, "dur": 0.069, + "args": { + "External id": 519840,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174215.454, "dur": 0.068, + "args": { + "External id": 519841,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174215.965, "dur": 0.065, + "args": { + "External id": 519842,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174216.380, "dur": 0.066, + "args": { + "External id": 519843,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174216.688, "dur": 0.055, + "args": { + "External id": 519844,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174217.137, "dur": 0.063, + "args": { + "External id": 519845,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174217.446, "dur": 0.053, + "args": { + "External id": 519846,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174217.832, "dur": 0.066, + "args": { + "External id": 519847,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174218.145, "dur": 0.054, + "args": { + "External id": 519848,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174218.586, "dur": 0.067, + "args": { + "External id": 519849,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174218.900, "dur": 0.048, + "args": { + "External id": 519850,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174219.303, "dur": 0.091, + "args": { + "External id": 519851,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174219.642, "dur": 0.052, + "args": { + "External id": 519852,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174220.225, "dur": 0.053, + "args": { + "External id": 519853,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174220.523, "dur": 0.053, + "args": { + "External id": 519854,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174220.915, "dur": 0.067, + "args": { + "External id": 519855,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174221.221, "dur": 0.054, + "args": { + "External id": 519856,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174221.640, "dur": 0.073, + "args": { + "External id": 519857,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174221.958, "dur": 0.054, + "args": { + "External id": 519858,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174222.358, "dur": 0.069, + "args": { + "External id": 519859,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174222.670, "dur": 0.057, + "args": { + "External id": 519860,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174223.191, "dur": 0.050, + "args": { + "External id": 519861,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174223.490, "dur": 0.051, + "args": { + "External id": 519862,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174224.021, "dur": 0.054, + "args": { + "External id": 519863,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174224.317, "dur": 0.053, + "args": { + "External id": 519864,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174224.777, "dur": 0.066, + "args": { + "External id": 519865,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174225.090, "dur": 0.055, + "args": { + "External id": 519866,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174225.447, "dur": 0.067, + "args": { + "External id": 519867,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174225.754, "dur": 0.053, + "args": { + "External id": 519868,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174226.221, "dur": 0.062, + "args": { + "External id": 519869,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174226.543, "dur": 0.054, + "args": { + "External id": 519870,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174226.935, "dur": 0.070, + "args": { + "External id": 519871,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174227.262, "dur": 0.055, + "args": { + "External id": 519872,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174227.686, "dur": 0.068, + "args": { + "External id": 519873,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174228.013, "dur": 0.052, + "args": { + "External id": 519874,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174228.478, "dur": 0.063, + "args": { + "External id": 519875,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174228.797, "dur": 0.056, + "args": { + "External id": 519876,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174229.356, "dur": 0.063, + "args": { + "External id": 519877,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174229.678, "dur": 0.053, + "args": { + "External id": 519878,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174230.131, "dur": 0.063, + "args": { + "External id": 519879,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174230.647, "dur": 0.091, + "args": { + "External id": 519880,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174231.171, "dur": 0.063, + "args": { + "External id": 519881,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174231.504, "dur": 0.070, + "args": { + "External id": 519882,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174231.924, "dur": 0.068, + "args": { + "External id": 519883,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174232.252, "dur": 0.061, + "args": { + "External id": 519884,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174232.964, "dur": 0.085, + "args": { + "External id": 519885,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174233.310, "dur": 0.274, + "args": { + "External id": 519886,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174234.266, "dur": 0.085, + "args": { + "External id": 519887,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174234.605, "dur": 0.289, + "args": { + "External id": 519888,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174235.247, "dur": 0.067, + "args": { + "External id": 519889,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174235.567, "dur": 0.062, + "args": { + "External id": 519890,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174236.037, "dur": 0.070, + "args": { + "External id": 519891,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174236.366, "dur": 0.266, + "args": { + "External id": 519892,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174237.019, "dur": 0.095, + "args": { + "External id": 519893,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174237.373, "dur": 0.071, + "args": { + "External id": 519894,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174237.854, "dur": 0.098, + "args": { + "External id": 519895,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174238.209, "dur": 0.052, + "args": { + "External id": 519896,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174238.655, "dur": 0.059, + "args": { + "External id": 519897,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174238.974, "dur": 0.049, + "args": { + "External id": 519898,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174239.421, "dur": 0.063, + "args": { + "External id": 519899,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174239.737, "dur": 0.050, + "args": { + "External id": 519900,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174240.176, "dur": 0.069, + "args": { + "External id": 519901,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174240.497, "dur": 0.050, + "args": { + "External id": 519902,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174240.997, "dur": 0.051, + "args": { + "External id": 519903,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174241.302, "dur": 0.052, + "args": { + "External id": 519904,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174241.640, "dur": 0.067, + "args": { + "External id": 519905,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174241.984, "dur": 0.054, + "args": { + "External id": 519906,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174242.431, "dur": 0.066, + "args": { + "External id": 519907,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174242.757, "dur": 0.050, + "args": { + "External id": 519908,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174243.280, "dur": 0.070, + "args": { + "External id": 519909,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174243.604, "dur": 0.050, + "args": { + "External id": 519910,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174243.967, "dur": 0.066, + "args": { + "External id": 519911,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174244.287, "dur": 0.054, + "args": { + "External id": 519912,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174244.743, "dur": 0.066, + "args": { + "External id": 519913,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174245.057, "dur": 0.050, + "args": { + "External id": 519914,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174245.389, "dur": 0.065, + "args": { + "External id": 519915,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174245.712, "dur": 0.054, + "args": { + "External id": 519916,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174246.101, "dur": 0.072, + "args": { + "External id": 519917,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174246.448, "dur": 0.056, + "args": { + "External id": 519918,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174246.910, "dur": 0.065, + "args": { + "External id": 519919,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174247.234, "dur": 0.053, + "args": { + "External id": 519920,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174247.727, "dur": 0.070, + "args": { + "External id": 519921,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174248.328, "dur": 0.068, + "args": { + "External id": 519922,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174248.850, "dur": 0.065, + "args": { + "External id": 519923,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174249.357, "dur": 0.065, + "args": { + "External id": 519924,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174249.806, "dur": 0.069, + "args": { + "External id": 519925,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174250.345, "dur": 0.070, + "args": { + "External id": 519926,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174250.798, "dur": 0.063, + "args": { + "External id": 519927,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174251.347, "dur": 0.067, + "args": { + "External id": 519928,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174251.841, "dur": 0.069, + "args": { + "External id": 519929,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174252.168, "dur": 0.053, + "args": { + "External id": 519930,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174252.547, "dur": 0.070, + "args": { + "External id": 519931,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174252.872, "dur": 0.054, + "args": { + "External id": 519932,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174253.319, "dur": 0.068, + "args": { + "External id": 519933,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174253.653, "dur": 0.056, + "args": { + "External id": 519934,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174254.113, "dur": 0.068, + "args": { + "External id": 519935,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174254.441, "dur": 0.053, + "args": { + "External id": 519936,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174254.847, "dur": 0.067, + "args": { + "External id": 519937,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174255.167, "dur": 0.054, + "args": { + "External id": 519938,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174255.629, "dur": 0.066, + "args": { + "External id": 519939,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174255.950, "dur": 0.055, + "args": { + "External id": 519940,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174256.409, "dur": 0.104, + "args": { + "External id": 519941,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174256.767, "dur": 0.083, + "args": { + "External id": 519942,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174257.198, "dur": 0.069, + "args": { + "External id": 519943,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174257.521, "dur": 0.087, + "args": { + "External id": 519944,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174258.014, "dur": 0.066, + "args": { + "External id": 519945,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174258.340, "dur": 0.053, + "args": { + "External id": 519946,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174258.783, "dur": 0.065, + "args": { + "External id": 519947,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174259.106, "dur": 0.052, + "args": { + "External id": 519948,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174259.551, "dur": 0.066, + "args": { + "External id": 519949,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174259.867, "dur": 0.050, + "args": { + "External id": 519950,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174260.324, "dur": 0.066, + "args": { + "External id": 519951,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174260.647, "dur": 0.050, + "args": { + "External id": 519952,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174261.101, "dur": 0.060, + "args": { + "External id": 519953,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174261.419, "dur": 0.048, + "args": { + "External id": 519954,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174261.892, "dur": 0.067, + "args": { + "External id": 519955,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174262.216, "dur": 0.049, + "args": { + "External id": 519956,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174262.677, "dur": 0.062, + "args": { + "External id": 519957,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174262.993, "dur": 0.101, + "args": { + "External id": 519958,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174263.517, "dur": 0.296, + "args": { + "External id": 519959,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174264.088, "dur": 0.305, + "args": { + "External id": 519960,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174264.753, "dur": 0.064, + "args": { + "External id": 519961,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174265.072, "dur": 0.071, + "args": { + "External id": 519962,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174265.515, "dur": 0.263, + "args": { + "External id": 519963,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174266.037, "dur": 0.260, + "args": { + "External id": 519964,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174266.694, "dur": 0.319, + "args": { + "External id": 519965,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174267.420, "dur": 0.283, + "args": { + "External id": 519966,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174268.095, "dur": 0.290, + "args": { + "External id": 519967,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174268.778, "dur": 0.259, + "args": { + "External id": 519968,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174269.392, "dur": 0.066, + "args": { + "External id": 519969,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174269.850, "dur": 0.066, + "args": { + "External id": 519970,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174270.177, "dur": 0.066, + "args": { + "External id": 519971,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174270.501, "dur": 0.053, + "args": { + "External id": 519972,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174270.979, "dur": 0.069, + "args": { + "External id": 519973,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174271.347, "dur": 0.059, + "args": { + "External id": 519974,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174271.940, "dur": 0.065, + "args": { + "External id": 519975,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174272.261, "dur": 0.051, + "args": { + "External id": 519976,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174273.120, "dur": 0.067, + "args": { + "External id": 519977,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174273.699, "dur": 0.066, + "args": { + "External id": 519978,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174274.493, "dur": 0.068, + "args": { + "External id": 519979,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174274.841, "dur": 0.056, + "args": { + "External id": 519980,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174275.533, "dur": 0.054, + "args": { + "External id": 519981,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174275.852, "dur": 0.053, + "args": { + "External id": 519982,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174276.390, "dur": 0.251, + "args": { + "External id": 519983,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174277.014, "dur": 0.065, + "args": { + "External id": 519984,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174277.788, "dur": 0.066, + "args": { + "External id": 519985,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174278.115, "dur": 0.052, + "args": { + "External id": 519986,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174279.067, "dur": 0.066, + "args": { + "External id": 519987,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174279.400, "dur": 0.052, + "args": { + "External id": 519988,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174280.059, "dur": 0.069, + "args": { + "External id": 519989,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174280.583, "dur": 0.071, + "args": { + "External id": 519990,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174281.219, "dur": 0.071, + "args": { + "External id": 519991,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174281.574, "dur": 0.054, + "args": { + "External id": 519992,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174282.285, "dur": 0.069, + "args": { + "External id": 519993,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174282.730, "dur": 0.072, + "args": { + "External id": 519994,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174283.550, "dur": 0.071, + "args": { + "External id": 519995,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174283.885, "dur": 0.054, + "args": { + "External id": 519996,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174284.506, "dur": 0.071, + "args": { + "External id": 519997,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174285.045, "dur": 0.054, + "args": { + "External id": 519998,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174285.939, "dur": 0.071, + "args": { + "External id": 519999,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174286.273, "dur": 0.057, + "args": { + "External id": 520000,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174286.831, "dur": 0.064, + "args": { + "External id": 520001,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174287.349, "dur": 0.065, + "args": { + "External id": 520002,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174288.070, "dur": 0.066, + "args": { + "External id": 520003,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174288.398, "dur": 0.056, + "args": { + "External id": 520004,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174289.150, "dur": 0.071, + "args": { + "External id": 520005,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174289.482, "dur": 0.055, + "args": { + "External id": 520006,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174290.253, "dur": 0.055, + "args": { + "External id": 520007,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174290.735, "dur": 0.058, + "args": { + "External id": 520008,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174291.528, "dur": 0.062, + "args": { + "External id": 520009,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174291.944, "dur": 0.069, + "args": { + "External id": 520010,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174292.814, "dur": 0.063, + "args": { + "External id": 520011,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174293.237, "dur": 0.064, + "args": { + "External id": 520012,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174293.974, "dur": 0.054, + "args": { + "External id": 520013,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174294.487, "dur": 0.068, + "args": { + "External id": 520014,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174295.183, "dur": 0.065, + "args": { + "External id": 520015,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174295.515, "dur": 0.052, + "args": { + "External id": 520016,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174296.323, "dur": 0.065, + "args": { + "External id": 520017,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174296.671, "dur": 0.048, + "args": { + "External id": 520018,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174297.366, "dur": 0.058, + "args": { + "External id": 520019,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174297.688, "dur": 0.051, + "args": { + "External id": 520020,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174298.334, "dur": 0.091, + "args": { + "External id": 520021,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174298.817, "dur": 0.102, + "args": { + "External id": 520022,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174299.408, "dur": 0.395, + "args": { + "External id": 520023,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174300.446, "dur": 0.284, + "args": { + "External id": 520024,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174301.218, "dur": 0.067, + "args": { + "External id": 520025,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174301.669, "dur": 0.085, + "args": { + "External id": 520026,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174302.321, "dur": 0.280, + "args": { + "External id": 520027,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174302.973, "dur": 0.270, + "args": { + "External id": 520028,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174303.989, "dur": 0.067, + "args": { + "External id": 520029,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174304.316, "dur": 0.054, + "args": { + "External id": 520030,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174304.956, "dur": 0.262, + "args": { + "External id": 520031,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174305.669, "dur": 0.090, + "args": { + "External id": 520032,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174306.536, "dur": 0.063, + "args": { + "External id": 520033,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174306.899, "dur": 0.067, + "args": { + "External id": 520034,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174307.604, "dur": 0.066, + "args": { + "External id": 520035,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174307.929, "dur": 0.054, + "args": { + "External id": 520036,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174308.699, "dur": 0.067, + "args": { + "External id": 520037,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338711, "tid": 2338711, + "ts": 6274147174309.028, "dur": 0.052, + "args": { + "External id": 520038,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 2338711, "tid": 2338711, + "ts": 6274147174885.100, "dur": 3568.752, + "args": { + "External id": 520039,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "1.6577051539811306e-05", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 21931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 2338711, "tid": 2338711, + "ts": 6274147177258.599, "dur": 998.664, + "args": { + "External id": 520040,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "1.6577051539811306e-05", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 21932 + } + }, + { + "name": "process_name", "ph": "M", "ts": 6274140629410.411, "pid": 2338711, "tid": 0, + "args": { + "name": "python3.12" + } + }, + { + "name": "process_labels", "ph": "M", "ts": 6274140629410.411, "pid": 2338711, "tid": 0, + "args": { + "labels": "CPU" + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 6274140629410.411, "pid": 2338711, "tid": 0, + "args": { + "sort_index": 2338711 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 6274140629410.411, "pid": 2338711, "tid": 2379440, + "args": { + "name": "thread 2379440 (pt_autograd_5)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 6274140629410.411, "pid": 2338711, "tid": 2379440, + "args": { + "sort_index": 2379440 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 6274140629410.411, "pid": 2338711, "tid": 2379440, + "args": { + "name": "thread 2379440 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 6274140629410.411, "pid": 2338711, "tid": 2379440, + "args": { + "sort_index": 2379440 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 6274140629410.411, "pid": 2338711, "tid": 2338711, + "args": { + "name": "thread 2338711 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 6274140629410.411, "pid": 2338711, "tid": 2338711, + "args": { + "sort_index": 2338711 + } + }, + { + "ph": "X", "cat": "Trace", "ts": 6274140629344.866, "dur": 6556105.161, + "pid": "Spans", "tid": "PyTorch Profiler", + "name": "PyTorch Profiler (0)", + "args": { + "Op count": 0 + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 6274140629344.866, + "pid": "Spans", "tid": 0, + "args": { + "sort_index": 536870912 + } + }, + { + "name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g", + "pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 6274140629344.866 + }, + { + "name": "Record Window End", "ph": "i", "s": "g", + "pid": "", "tid": "", "ts": 6274147279618.627 + } + ], + "traceName": "exp/mtp.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine/profile_trace/iteration_11776/rank5_trace.json", + "displayTimeUnit": "ms", + "baseTimeNanoseconds": 1751410836000000000 +} \ No newline at end of file